[sword-cvs] icu-sword/source/common/unicode symtable.h,NONE,1.1 usprep.h,NONE,1.1 utrace.h,NONE,1.1 brkiter.h,1.1,1.2 caniter.h,1.1,1.2 chariter.h,1.4,1.5 dbbi.h,1.1,1.2 locid.h,1.4,1.5 normlzr.h,1.5,1.6 parsepos.h,1.1,1.2 platform.h.in,1.5,1.6 pos400.h,1.3,1.4 putil.h,1.4,1.5 pwin32.h,1.5,1.6 rbbi.h,1.1,1.2 rep.h,1.4,1.5 resbund.h,1.4,1.5 schriter.h,1.4,1.5 strenum.h,1.1,1.2 ubrk.h,1.1,1.2 uchar.h,1.5,1.6 uchriter.h,1.4,1.5 uclean.h,1.4,1.5 ucnv.h,1.4,1.5 ucnv_err.h,1.4,1.5 uenum.h,1.1,1.2 uidna.h,1.1,1.2 uiter.h,1.1,1.2 uloc.h,1.4,1.5 umachine.h,1.4,1.5 umisc.h,1.3,1.4 unifilt.h,1.1,1.2 unifunct.h,1.1,1.2 unimatch.h,1.1,1.2 uniset.h,1.1,1.2 unistr.h,1.5,1.6 unorm.h,1.4,1.5 uobject.h,1.1,1.2 urename.h,1.5,1.6 ures.h,1.5,1.6 uscript.h,1.5,1.6 uset.h,1.1,1.2 usetiter.h,1.1,1.2 ustring.h,1.4,1.5 utf.h,1.4,1.5 utf16.h,1.3,1.4 utf8.h,1.4,1.5 utypes.h,1.9,1.10 uversion.h,1.5,1.6
sword@www.crosswire.org
sword@www.crosswire.org
Tue, 6 Apr 2004 03:11:19 -0700
- Previous message: [sword-cvs] icu-sword/source/layout AnchorTables.cpp,1.3,1.4 ArabicLayoutEngine.cpp,1.3,1.4 ArabicLayoutEngine.h,1.3,1.4 ArabicShaping.cpp,1.3,1.4 ClassDefinitionTables.cpp,1.3,1.4 ContextualGlyphSubstProc.h,1.3,1.4 ContextualSubstSubtables.cpp,1.3,1.4 ContextualSubstSubtables.h,1.3,1.4 DefaultCharMapper.h,1.3,1.4 GXLayoutEngine.cpp,1.3,1.4 GXLayoutEngine.h,1.3,1.4 GlyphIterator.cpp,1.3,1.4 GlyphIterator.h,1.3,1.4 GlyphPositioningTables.cpp,1.3,1.4 GlyphSubstitutionTables.cpp,1.3,1.4 GlyphSubstitutionTables.h,1.3,1.4 HanLayoutEngine.cpp,1.1,1.2 HanLayoutEngine.h,1.1,1.2 IndicLayoutEngine.cpp,1.3,1.4 IndicLayoutEngine.h,1.3,1.4 IndicRearrangementProcessor.cpp,1.3,1.4 IndicRearrangementProcessor.h,1.3,1.4 IndicReordering.cpp,1.3,1.4 IndicReordering.h,1.3,1.4 LEFontInstance.h,1.3,1.4 LEGlyphFilter.h,1.3,1.4 LELanguages.h,1.1,1.2 LEScripts.h,1.3,1.4 LESwaps.h,1.3,1.4 LETypes.h,1.3,1.4 LayoutEngine.cpp,1.3,1.4 LayoutEngine.h,1.3,1.4 LigatureSubstProc.cpp,1.3,1.4 LigatureSubstProc.h,1.3,1.4 LookupProcessor.cpp,1.3,1.4 LookupProcessor.h,1.3,1.4 MPreFixups.cpp,1.1,1.2 MPreFixups.h,1.1,1.2 Makefile.in,1.4,1.5 MarkToBasePosnSubtables.cpp,1.3,1.4 MarkToMarkPosnSubtables.cpp,1.3,1.4 MultipleSubstSubtables.cpp,1.3,1.4 OpenTypeLayoutEngine.cpp,1.4,1.5 OpenTypeLayoutEngine.h,1.3,1.4 ScriptAndLanguage.h,1.3,1.4 ScriptAndLanguageTags.cpp,1.3,1.4 ScriptAndLanguageTags.h,1.3,1.4 SegmentArrayProcessor.h,1.3,1.4 SegmentSingleProcessor.h,1.3,1.4 SimpleArrayProcessor.h,1.3,1.4 SingleTableProcessor.h,1.3,1.4 ThaiLayoutEngine.cpp,1.3,1.4 ThaiLayoutEngine.h,1.3,1.4 ThaiShaping.cpp,1.4,1.5 ThaiShaping.h,1.3,1.4 ThaiStateTables.cpp,1.3,1.4 TrimmedArrayProcessor.h,1.3,1.4 layout.dsp,1.4,1.5 layout.rc,1.3,1.4 layout.vcproj,1.1,1.2
- Next message: [sword-cvs] icu-sword/source/test/testdata conversion.txt,NONE,1.1 nfs4_cis_prep.txt,NONE,1.1 nfs4_cs_prep_ci.txt,NONE,1.1 nfs4_cs_prep_cs.txt,NONE,1.1 nfs4_mixed_prep_p.txt,NONE,1.1 nfs4_mixed_prep_s.txt,NONE,1.1 ra.txt,NONE,1.1 riwords.txt,NONE,1.1 test4x.ucm,NONE,1.1 CollationTest_NON_IGNORABLE_STUB.txt,1.1,1.2 CollationTest_SHIFTED_STUB.txt,1.1,1.2 DataDrivenCollationTest.txt,1.1,1.2 idna_rules.txt,1.1,1.2 rbbitst.txt,1.1,1.2 regextst.txt,1.1,1.2 te.txt,1.3,1.4 test1.ucm,1.3,1.4 test3.ucm,1.3,1.4 test4.ucm,1.3,1.4 testaliases.txt,1.1,1.2 testdata.mk,1.4,1.5 testtypes.txt,1.4,1.5 translit_rules.txt,1.1,1.2 th18057.txt,1.5,NONE
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvs/core/icu-sword/source/common/unicode
In directory www:/tmp/cvs-serv8911/source/common/unicode
Modified Files:
brkiter.h caniter.h chariter.h dbbi.h locid.h normlzr.h
parsepos.h platform.h.in pos400.h putil.h pwin32.h rbbi.h
rep.h resbund.h schriter.h strenum.h ubrk.h uchar.h uchriter.h
uclean.h ucnv.h ucnv_err.h uenum.h uidna.h uiter.h uloc.h
umachine.h umisc.h unifilt.h unifunct.h unimatch.h uniset.h
unistr.h unorm.h uobject.h urename.h ures.h uscript.h uset.h
usetiter.h ustring.h utf.h utf16.h utf8.h utypes.h uversion.h
Added Files:
symtable.h usprep.h utrace.h
Log Message:
ICU 2.8 sync
--- NEW FILE: symtable.h ---
/*
**********************************************************************
* Copyright (c) 2000-2003, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 02/04/00 aliu Creation.
**********************************************************************
*/
#ifndef SYMTABLE_H
#define SYMTABLE_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
U_NAMESPACE_BEGIN
class ParsePosition;
class UnicodeFunctor;
class UnicodeSet;
class UnicodeString;
/**
* An interface that defines both lookup protocol and parsing of
* symbolic names.
*
* <p>A symbol table maintains two kinds of mappings. The first is
* between symbolic names and their values. For example, if the
* variable with the name "start" is set to the value "alpha"
* (perhaps, though not necessarily, through an expression such as
* "$start=alpha"), then the call lookup("start") will return the
* char[] array ['a', 'l', 'p', 'h', 'a'].
*
* <p>The second kind of mapping is between character values and
* UnicodeMatcher objects. This is used by RuleBasedTransliterator,
* which uses characters in the private use area to represent objects
* such as UnicodeSets. If U+E015 is mapped to the UnicodeSet [a-z],
* then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
*
* <p>Finally, a symbol table defines parsing behavior for symbolic
* names. All symbolic names start with the SYMBOL_REF character.
* When a parser encounters this character, it calls parseReference()
* with the position immediately following the SYMBOL_REF. The symbol
* table parses the name, if there is one, and returns it.
*
* @draft ICU 2.8
*/
class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ {
public:
/**
* The character preceding a symbol reference name.
*/
enum { SYMBOL_REF = 0x0024 /*$*/ };
/**
* Destructor.
*/
virtual ~SymbolTable();
/**
* Lookup the characters associated with this string and return it.
* Return <tt>NULL</tt> if no such name exists. The resultant
* string may have length zero.
* @param s the symbolic name to lookup
* @return a string containing the name's value, or <tt>NULL</tt> if
* there is no mapping for s.
*/
virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
/**
* Lookup the UnicodeMatcher associated with the given character, and
* return it. Return <tt>NULL</tt> if not found.
* @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
* @return the UnicodeMatcher object represented by the given
* character, or NULL if there is no mapping for ch.
*/
virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0;
/**
* Parse a symbol reference name from the given string, starting
* at the given position. If no valid symbol reference name is
* found, return the empty string and leave pos unchanged. That is, if the
* character at pos cannot start a name, or if pos is at or after
* text.length(), then return an empty string. This indicates an
* isolated SYMBOL_REF character.
* @param text the text to parse for the name
* @param pos on entry, the index of the first character to parse.
* This is the character following the SYMBOL_REF character. On
* exit, the index after the last parsed character. If the parse
* failed, pos is unchanged on exit.
* @param limit the index after the last character to be parsed.
* @return the parsed name, or an empty string if there is no
* valid symbolic name at the given position.
*/
virtual UnicodeString parseReference(const UnicodeString& text,
ParsePosition& pos, int32_t limit) const = 0;
};
U_NAMESPACE_END
#endif
--- NEW FILE: usprep.h ---
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: usprep.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003jul2
* created by: Ram Viswanadha
*/
#ifndef __USPREP_H__
#define __USPREP_H__
#include "unicode/utypes.h"
/**
*\file
* StringPrep API implements the StingPrep framework as described by RFC 3454.
* StringPrep prepares Unicode strings for use in network protocols.
* Profiles of StingPrep are set of rules and data according to with the
* Unicode Strings are prepared. Each profiles contains tables which describe
* how a code point should be treated. The tables are broadly classied into
* <ul>
* <li> Unassinged Table: Contains code points that are unassigned
* in the Unicode Version supported by StringPrep. Currently
* RFC 3454 supports Unicode 3.2. </li>
* <li> Prohibited Table: Contains code points that are prohibted from
* the output of the StringPrep processing function. </li>
* <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
* </ul>
*
* The procedure for preparing Unicode strings:
* <ol>
* <li> Map: For each character in the input, check if it has a mapping
* and, if so, replace it with its mapping. </li>
* <li> Normalize: Possibly normalize the result of step 1 using Unicode
* normalization. </li>
* <li> Prohibit: Check for any characters that are not allowed in the
* output. If any are found, return an error.</li>
* <li> Check bidi: Possibly check for right-to-left characters, and if
* any are found, make sure that the whole string satisfies the
* requirements for bidirectional strings. If the string does not
* satisfy the requirements for bidirectional strings, return an
* error. </li>
* </ol>
* @author Ram Viswanadha
*/
#if !UCONFIG_NO_IDNA
#include "unicode/parseerr.h"
/**
* The StringPrep profile
* @draft ICU 2.8
*/
typedef struct UStringPrepProfile UStringPrepProfile;
/**
* Option to prohibit processing of unassigned code points in the input
*
* @see usprep_prepare
* @draft ICU 2.8
*/
#define USPREP_DEFAULT 0x0000
/**
* Option to allow processing of unassigned code points in the input
*
* @see usprep_prepare
* @draft ICU 2.8
*/
#define USPREP_ALLOW_UNASSIGNED 0x0001
/**
* Creates a StringPrep profile from the data file.
*
* @param path string containing the full path pointing to the directory
* where the profile reside followed by the package name
* e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system.
* if NULL, ICU default data files will be used.
* @param fileName name of the profile file to be opened
* @param status ICU error code in/out parameter. Must not be NULL.
* Must fulfill U_SUCCESS before the function call.
* @return Pointer to UStringPrepProfile that is opened. Should be closed by
* calling usprep_close()
* @see usprep_close()
* @draft ICU 2.8
*/
U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char* path,
const char* fileName,
UErrorCode* status);
/**
* Closes the profile
* @param profile The profile to close
* @draft ICU 2.8
*/
U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile* profile);
/**
* Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
* checks for prohited and BiDi characters in the order defined by RFC 3454
* depending on the options specified in the profile.
*
* @param prep The profile to use
* @param src Pointer to UChar buffer containing the string to prepare
* @param srcLength Number of characters in the source string
* @param dest Pointer to the destination buffer to receive the output
* @param destCapacity The capacity of destination array
* @param options A bit set of options:
*
* - USPREP_NONE Prohibit processing of unassigned code points in the input
*
* - USPREP_ALLOW_UNASSIGNED Treat the unassigned code points are in the input
* as normal Unicode code points.
*
* @param parseError Pointer to UParseError struct to receive information on position
* of error if an error is encountered. Can be NULL.
* @param status ICU in/out error code parameter.
* U_INVALID_CHAR_FOUND if src contains
* unmatched single surrogates.
* U_INDEX_OUTOFBOUNDS_ERROR if src contains
* too many code points.
* U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
* @return The number of UChars in the destination buffer
* @draft ICU 2.8
*/
U_CAPI int32_t U_EXPORT2
usprep_prepare( const UStringPrepProfile* prep,
const UChar* src, int32_t srcLength,
UChar* dest, int32_t destCapacity,
int32_t options,
UParseError* parseError,
UErrorCode* status );
#endif /* #if !UCONFIG_NO_IDNA */
#endif
--- NEW FILE: utrace.h ---
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utrace.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003aug06
* created by: Markus W. Scherer
*
* Definitions for ICU tracing/logging.
*
*/
#ifndef __UTRACE_H__
#define __UTRACE_H__
#include <stdarg.h>
#include "unicode/utypes.h"
U_CDECL_BEGIN
/**
* Trace severity levels. Higher levels increase the verbosity of the trace output.
* @see utrace_setLevel
* @draft ICU 2.8
*/
typedef enum UTraceLevel {
/** Disable all tracing @draft ICU 2.8*/
UTRACE_OFF=-1,
/** Trace error conditions only @draft ICU 2.8*/
UTRACE_ERROR=0,
/** Trace errors and warnings @draft ICU 2.8*/
UTRACE_WARNING=3,
/** Trace opens and closes of ICU services @draft ICU 2.8*/
UTRACE_OPEN_CLOSE=5,
/** Trace an intermediate number of ICU operations @draft ICU 2.8*/
UTRACE_INFO=7,
/** Trace the maximum number of ICU operations @draft ICU 2.8*/
UTRACE_VERBOSE=9
} UTraceLevel;
/**
* Setter for the trace level.
* @param traceLevel A UTraceLevel value.
* @draft ICU 2.8
*/
U_CAPI void U_EXPORT2
utrace_setLevel(int32_t traceLevel);
/**
* Getter for the trace level.
* @param traceLevel A UTraceLevel value.
* @draft ICU 2.8
*/
U_CAPI int32_t U_EXPORT2
utrace_getLevel(void);
/* Trace function pointers types ----------------------------- */
/**
* Type signature for the trace function to be called when entering a function.
* @param context value supplied at the time the trace functions are set.
* @param fnNumber Enum value indicating the ICU function being entered.
* @draft ICU 2.8
*/
typedef void U_CALLCONV
UTraceEntry(const void *context, int32_t fnNumber);
/**
* Type signature for the trace function to be called when exiting from a function.
* @param context value supplied at the time the trace functions are set.
* @param fnNumber Enum value indicating the ICU function being exited.
* @param fmt A formatting string that describes the number and types
* of arguments included with the variable args. The fmt
* string has the same form as the utrace_vformat format
* string.
* @param args A variable arguments list. Contents are described by
* the fmt parameter.
* @see utrace_vformat
* @draft ICU 2.8
*/
typedef void U_CALLCONV
UTraceExit(const void *context, int32_t fnNumber,
const char *fmt, va_list args);
/**
* Type signature for the trace function to be called from within an ICU function
* to display data or messages.
* @param context value supplied at the time the trace functions are set.
* @param fnNumber Enum value indicating the ICU function being exited.
* @param level The current tracing level
* @param fmt A format string describing the tracing data that is supplied
* as variable args
* @param args The data being traced, passed as variable args.
* @draft ICU 2.8
*/
typedef void U_CALLCONV
UTraceData(const void *context, int32_t fnNumber, int32_t level,
const char *fmt, va_list args);
/**
* Set ICU Tracing functions. Installs application-provided tracing
* functions into ICU. After doing this, subsequent ICU operations
* will call back to the installed functions, providing a trace
* of the use of ICU. Passing a NULL pointer for a tracing function
* is allowed, and inhibits tracing action at points where that function
* would be called.
* <p>
* Tracing and Threads: Tracing functions are global to a process, and
* will be called in response to ICU operations performed by any
* thread. If tracing of an individual thread is desired, the
* tracing functions must themselves filter by checking that the
* current thread is the desired thread.
*
* @param context an uninterpretted pointer. Whatever is passed in
* here will in turn be passed to each of the tracing
* functions UTraceEntry, UTraceExit and UTraceData.
* ICU does not use or alter this pointer.
* @param e Callback function to be called on entry to a
* a traced ICU function.
* @param x Callback function to be called on exit from a
* traced ICU function.
* @param d Callback function to be called from within a
* traced ICU function, for the purpose of providing
* data to the trace.
*
* @draft ICU 2.8
*/
U_CAPI void U_EXPORT2
utrace_setFunctions(const void *context,
UTraceEntry *e, UTraceExit *x, UTraceData *d);
/**
* Get the currently installed ICU tracing functions. Note that a null function
* pointer will be returned if no trace function has been set.
*
* @param context The currently installed tracing context.
* @param e The currently installed UTraceEntry function.
* @param x The currently installed UTraceExit function.
* @param d The currently installed UTraceData function.
* @draft ICU 2.8
*/
U_CAPI void U_EXPORT2
utrace_getFunctions(const void **context,
UTraceEntry **e, UTraceExit **x, UTraceData **d);
/*
*
* ICU trace format string syntax
*
* Format Strings are passed to UTraceData functions, and define the
* number and types of the trace data being passed on each call.
*
* The UTraceData function, which is supplied by the application,
* not by ICU, can either forward the trace data (passed via
* varargs) and the format string back to ICU for formatting into
* a displayable string, or it can interpret the format itself,
* and do as it wishes with the trace data.
*
*
* Goals for the format string
* - basic data output
* - easy to use for trace programmer
* - sufficient provision for data types for trace output readability
* - well-defined types and binary portable APIs
*
* Non-goals
* - printf compatibility
* - fancy formatting
* - argument reordering and other internationalization features
*
* ICU trace format strings contain plain text with argument inserts,
* much like standard printf format strings.
* Each insert begins with a '%', then optionally contains a 'v',
* then exactly one type character.
* Two '%' in a row represent a '%' instead of an insert.
* The trace format strings need not have \n at the end.
*
*
* Types
* -----
*
* Type characters:
* - c A char character in the default codepage.
* - s A NUL-terminated char * string in the default codepage.
* - S A UChar * string. Requires two params, (ptr, length). Length=-1 for nul term.
* - b A byte (8-bit integer).
* - h A 16-bit integer. Also a 16 bit Unicode code unit.
* - d A 32-bit integer. Also a 20 bit Unicode code point value.
* - l A 64-bit integer.
* - p A data pointer.
*
* Vectors
* -------
*
* If the 'v' is not specified, then one item of the specified type
* is passed in.
* If the 'v' (for "vector") is specified, then a vector of items of the
* specified type is passed in, via a pointer to the first item
* and an int32_t value for the length of the vector.
* Length==-1 means zero or NUL termination. Works for vectors of all types.
*
* Note: %vS is a vector of (UChar *) strings. The strings must
* be nul terminated as there is no way to provide a
* separate length parameter for each string. The length
* parameter (required for all vectors) is the number of
* strings, not the length of the strings.
*
* Examples
* --------
*
* These examples show the parameters that will be passed to an application's
* UTraceData() function for various formats.
*
* - the precise formatting is up to the application!
* - the examples use type casts for arguments only to _show_ the types of
* arguments without needing variable declarations in the examples;
* the type casts will not be necessary in actual code
*
* UTraceDataFunc(context, fnNumber, level,
* "There is a character %c in the string %s.", // Format String
* (char)c, (const char *)s); // varargs parameters
* -> There is a character 0x42 'B' in the string "Bravo".
*
* UTraceDataFunc(context, fnNumber, level,
* "Vector of bytes %vb vector of chars %vc",
* (const uint8_t *)bytes, (int32_t)bytesLength,
* (const char *)chars, (int32_t)charsLength);
* -> Vector of bytes
* 42 63 64 3f [4]
* vector of chars
* "Bcd?"[4]
*
* UTraceDataFunc(context, fnNumber, level,
* "An int32_t %d and a whole bunch of them %vd",
* (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength);
* -> An int32_t 0xfffffffb and a whole bunch of them
* fffffffb 00000005 0000010a [3]
*
*/
/**
* Trace output Formatter. An application's UTraceData tracing functions may call
* back to this function to format the trace output in a
* human readable form. Note that a UTraceData function may choose
* to not format the data; it could, for example, save it in
* in the raw form it was received (more compact), leaving
* formatting for a later trace analyis tool.
* @param outBuf pointer to a buffer to receive the formatted output. Output
* will be nul terminated if there is space in the buffer -
* if the length of the requested output < the output buffer size.
* @param capacity Length of the output buffer.
* @param indent Number of spaces to indent the output. Intended to allow
* data displayed from nested functions to be indented for readability.
* @param fmt Format specification for the data to output
* @param args Data to be formatted.
* @return Length of formatted output, including the terminating NUL.
* If buffer capacity is insufficient, the required capacity is returned.
* @draft ICU 2.8
*/
U_CAPI int32_t U_EXPORT2
utrace_vformat(char *outBuf, int32_t capacity,
int32_t indent, const char *fmt, va_list args);
/**
* Trace output Formatter. An application's UTraceData tracing functions may call
* this function to format any additional trace data, beyond that
* provided by default, in human readable form with the same
* formatting conventions used by utrace_vformat().
* @param outBuf pointer to a buffer to receive the formatted output. Output
* will be nul terminated if there is space in the buffer -
* if the length of the requested output < the output buffer size.
* @param capacity Length of the output buffer.
* @param indent Number of spaces to indent the output. Intended to allow
* data displayed from nested functions to be indented for readability.
* @param fmt Format specification for the data to output
* @param ... Data to be formatted.
* @return Length of formatted output, including the terminating NUL.
* If buffer capacity is insufficient, the required capacity is returned.
* @draft ICU 2.8
*/
U_CAPI int32_t U_EXPORT2
utrace_format(char *outBuf, int32_t capacity,
int32_t indent, const char *fmt, ...);
/* Trace function numbers --------------------------------------------------- */
/**
* Get the name of a function from its trace function number.
*
* @param fnNumber The trace number for an ICU function.
* @return The name string for the function.
*
* @see UTraceFunctionNumber
* @draft ICU 2.8
*/
U_CAPI const char * U_EXPORT2
utrace_functionName(int32_t fnNumber);
/**
* These are the ICU functions that will be traced when tracing is enabled.
* @draft ICU 2.8
*/
typedef enum UTraceFunctionNumber {
UTRACE_FUNCTION_START=0,
UTRACE_U_INIT=UTRACE_FUNCTION_START,
UTRACE_U_CLEANUP,
UTRACE_FUNCTION_LIMIT,
UTRACE_CONVERSION_START=0x1000,
UTRACE_UCNV_OPEN=UTRACE_CONVERSION_START,
UTRACE_UCNV_OPEN_PACKAGE,
UTRACE_UCNV_OPEN_ALGORITHMIC,
UTRACE_UCNV_CLONE,
UTRACE_UCNV_CLOSE,
UTRACE_UCNV_FLUSH_CACHE,
UTRACE_UCNV_LOAD,
UTRACE_UCNV_UNLOAD,
UTRACE_CONVERSION_LIMIT,
UTRACE_COLLATION_START=0x2000,
UTRACE_UCOL_OPEN=UTRACE_COLLATION_START,
UTRACE_UCOL_CLOSE,
UTRACE_UCOL_STRCOLL,
UTRACE_UCOL_GET_SORTKEY,
UTRACE_UCOL_GETLOCALE,
UTRACE_UCOL_NEXTSORTKEYPART,
UTRACE_UCOL_STRCOLLITER,
UTRACE_COLLATION_LIMIT
} UTraceFunctionNumber;
U_CDECL_END
#endif
Index: brkiter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/brkiter.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- brkiter.h 10 Sep 2003 02:42:04 -0000 1.1
+++ brkiter.h 6 Apr 2004 10:08:07 -0000 1.2
@@ -270,7 +270,11 @@
* boundaries have been returned.
* @stable ICU 2.0
*/
+#ifdef U_CYGWIN
+ static U_COMMON_API const int32_t DONE;
+#else
static const int32_t DONE;
+#endif
/**
* Return the index of the first character in the text being scanned.
@@ -527,7 +531,7 @@
/**
* Register a new break iterator of the indicated kind, to use in the given locale.
- * The break iterator will be adoped. Clones of the iterator will be returned
+ * The break iterator will be adopted. Clones of the iterator will be returned
* if a request for a break iterator of the given kind matches or falls back to
* this locale.
* @param toAdopt the BreakIterator instance to be adopted
@@ -558,6 +562,21 @@
*/
static StringEnumeration* getAvailableLocales(void);
+ /**
+ * Returns the locale for this break iterator. Two flavors are available: valid and
+ * actual locale.
+ * @draft ICU 2.8 likely to change in ICU 3.0, based on feedback
+ */
+ Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+
+ /** Get the locale for this break iterator object. You can choose between valid and actual locale.
+ * @param type type of the locale we're looking for (valid or actual)
+ * @param status error code for the operation
+ * @return the locale
+ * @internal
+ */
+ const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
+
private:
static BreakIterator* makeCharacterInstance(const Locale& loc, UErrorCode& status);
static BreakIterator* makeWordInstance(const Locale& loc, UErrorCode& status);
@@ -578,7 +597,13 @@
UBool fBufferClone;
/** @internal */
BreakIterator (const BreakIterator &other) : UObject(other), fBufferClone(FALSE) {}
+
private:
+
+ /** @internal */
+ char actualLocale[ULOC_FULLNAME_CAPACITY];
+ char validLocale[ULOC_FULLNAME_CAPACITY];
+
/**
* The assignment operator has no real implementation.
* It's provided to make the compiler happy. Do not call.
Index: caniter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/caniter.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- caniter.h 10 Sep 2003 02:42:04 -0000 1.1
+++ caniter.h 6 Apr 2004 10:08:07 -0000 1.2
@@ -121,18 +121,18 @@
static void permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
/**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ * ICU "poor man's RTTI", returns a UClassID for this class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- virtual inline UClassID getDynamicClassID() const;
+ static UClassID getStaticClassID();
/**
- * ICU "poor man's RTTI", returns a UClassID for this class.
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- static inline UClassID getStaticClassID();
+ virtual UClassID getDynamicClassID() const;
private:
// ===================== PRIVATES ==============================
@@ -187,20 +187,7 @@
void cleanPieces();
- /**
- * The address of this static class variable serves as this class's ID
- * for ICU "poor man's RTTI".
- */
- static const char fgClassID;
};
-
-inline UClassID
-CanonicalIterator::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-CanonicalIterator::getDynamicClassID() const
-{ return CanonicalIterator::getStaticClassID(); }
U_NAMESPACE_END
Index: chariter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/chariter.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- chariter.h 10 Sep 2003 02:42:04 -0000 1.4
+++ chariter.h 6 Apr 2004 10:08:07 -0000 1.5
@@ -1,7 +1,7 @@
/*
********************************************************************
*
-* Copyright (C) 1997-2002, International Business Machines
+* Copyright (C) 1997-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************
@@ -84,100 +84,100 @@
*/
class U_COMMON_API ForwardCharacterIterator : public UObject {
public:
- /**
- * Value returned by most of ForwardCharacterIterator's functions
- * when the iterator has reached the limits of its iteration.
- * @stable ICU 2.0
- */
- enum { DONE = 0xffff };
-
- /**
- * Destructor.
- * @stable ICU 2.0
- */
- virtual ~ForwardCharacterIterator() {}
-
- /**
- * Returns true when both iterators refer to the same
- * character in the same character-storage object.
- * @param that The ForwardCharacterIterator to be compared for equality
- * @return true when both iterators refer to the same
- * character in the same character-storage object
- * @stable ICU 2.0
- */
- virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
-
- /**
- * Returns true when the iterators refer to different
- * text-storage objects, or to different characters in the
- * same text-storage object.
- * @param that The ForwardCharacterIterator to be compared for inequality
- * @Returns true when the iterators refer to different
- * text-storage objects, or to different characters in the
- * same text-storage object
- * @stable ICU 2.0
- */
- inline UBool operator!=(const ForwardCharacterIterator& that) const;
-
- /**
- * Generates a hash code for this iterator.
- * @return the hash code.
- * @stable ICU 2.0
- */
- virtual int32_t hashCode(void) const = 0;
-
- /**
- * Returns a UClassID for this ForwardCharacterIterator ("poor man's
- * RTTI").<P> Despite the fact that this function is public,
- * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
- * @Returns a UClassID for this ForwardCharacterIterator
- * @stable ICU 2.0
- */
- virtual UClassID getDynamicClassID(void) const = 0;
-
- /**
- * Gets the current code unit for returning and advances to the next code unit
- * in the iteration range
- * (toward endIndex()). If there are
- * no more code units to return, returns DONE.
- * @return the current code unit.
- * @stable ICU 2.0
- */
- virtual UChar nextPostInc(void) = 0;
-
- /**
- * Gets the current code point for returning and advances to the next code point
- * in the iteration range
- * (toward endIndex()). If there are
- * no more code points to return, returns DONE.
- * @return the current code point.
- * @stable ICU 2.0
- */
- virtual UChar32 next32PostInc(void) = 0;
-
- /**
- * Returns FALSE if there are no more code units or code points
- * at or after the current position in the iteration range.
- * This is used with nextPostInc() or next32PostInc() in forward
- * iteration.
- * @returns FALSE if there are no more code units or code points
- * at or after the current position in the iteration range.
- * @stable ICU 2.0
- */
- virtual UBool hasNext() = 0;
-
+ /**
+ * Value returned by most of ForwardCharacterIterator's functions
+ * when the iterator has reached the limits of its iteration.
+ * @stable ICU 2.0
+ */
+ enum { DONE = 0xffff };
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~ForwardCharacterIterator();
+
+ /**
+ * Returns true when both iterators refer to the same
+ * character in the same character-storage object.
+ * @param that The ForwardCharacterIterator to be compared for equality
+ * @return true when both iterators refer to the same
+ * character in the same character-storage object
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
+
+ /**
+ * Returns true when the iterators refer to different
+ * text-storage objects, or to different characters in the
+ * same text-storage object.
+ * @param that The ForwardCharacterIterator to be compared for inequality
+ * @Returns true when the iterators refer to different
+ * text-storage objects, or to different characters in the
+ * same text-storage object
+ * @stable ICU 2.0
+ */
+ inline UBool operator!=(const ForwardCharacterIterator& that) const;
+
+ /**
+ * Generates a hash code for this iterator.
+ * @return the hash code.
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const = 0;
+
+ /**
+ * Returns a UClassID for this ForwardCharacterIterator ("poor man's
+ * RTTI").<P> Despite the fact that this function is public,
+ * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
+ * @Returns a UClassID for this ForwardCharacterIterator
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const = 0;
+
+ /**
+ * Gets the current code unit for returning and advances to the next code unit
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code units to return, returns DONE.
+ * @return the current code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar nextPostInc(void) = 0;
+
+ /**
+ * Gets the current code point for returning and advances to the next code point
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code points to return, returns DONE.
+ * @return the current code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32PostInc(void) = 0;
+
+ /**
+ * Returns FALSE if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * This is used with nextPostInc() or next32PostInc() in forward
+ * iteration.
+ * @returns FALSE if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasNext() = 0;
+
protected:
- /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
- ForwardCharacterIterator() : UObject() {}
-
- /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
- ForwardCharacterIterator(const ForwardCharacterIterator &other) : UObject(other) {}
-
- /**
- * Assignment operator to be overridden in the implementing class.
- * @stable ICU 2.0
- */
- ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
+ /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
+ ForwardCharacterIterator() : UObject() {}
+
+ /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
+ ForwardCharacterIterator(const ForwardCharacterIterator &other) : UObject(other) {}
+
+ /**
+ * Assignment operator to be overridden in the implementing class.
+ * @stable ICU 2.0
+ */
+ ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
};
/**
@@ -351,361 +351,361 @@
*/
class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
public:
- /**
- * Origin enumeration for the move() and move32() functions.
- * @stable ICU 2.0
- */
- enum EOrigin { kStart, kCurrent, kEnd };
+ /**
+ * Origin enumeration for the move() and move32() functions.
+ * @stable ICU 2.0
+ */
+ enum EOrigin { kStart, kCurrent, kEnd };
- /**
- * Returns a pointer to a new CharacterIterator of the same
- * concrete class as this one, and referring to the same
- * character in the same text-storage object as this one. The
- * caller is responsible for deleting the new clone.
- * @return a pointer to a new CharacterIterator
- * @stable ICU 2.0
- */
- virtual CharacterIterator* clone(void) const = 0;
+ /**
+ * Returns a pointer to a new CharacterIterator of the same
+ * concrete class as this one, and referring to the same
+ * character in the same text-storage object as this one. The
+ * caller is responsible for deleting the new clone.
+ * @return a pointer to a new CharacterIterator
+ * @stable ICU 2.0
+ */
+ virtual CharacterIterator* clone(void) const = 0;
- /**
- * Sets the iterator to refer to the first code unit in its
- * iteration range, and returns that code unit.
- * This can be used to begin an iteration with next().
- * @return the first code unit in its iteration range.
- * @stable ICU 2.0
- */
- virtual UChar first(void) = 0;
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with next().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar first(void) = 0;
- /**
- * Sets the iterator to refer to the first code unit in its
- * iteration range, returns that code unit, and moves the position
- * to the second code unit. This is an alternative to setToStart()
- * for forward iteration with nextPostInc().
- * @return the first code unit in its iteration range.
- * @stable ICU 2.0
- */
- virtual UChar firstPostInc(void);
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, returns that code unit, and moves the position
+ * to the second code unit. This is an alternative to setToStart()
+ * for forward iteration with nextPostInc().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar firstPostInc(void);
- /**
- * Sets the iterator to refer to the first code point in its
- * iteration range, and returns that code unit,
- * This can be used to begin an iteration with next32().
- * Note that an iteration with next32PostInc(), beginning with,
- * e.g., setToStart() or firstPostInc(), is more efficient.
- * @return the first code point in its iteration range.
- * @stable ICU 2.0
- */
- virtual UChar32 first32(void) = 0;
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, and returns that code unit,
+ * This can be used to begin an iteration with next32().
+ * Note that an iteration with next32PostInc(), beginning with,
+ * e.g., setToStart() or firstPostInc(), is more efficient.
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32(void) = 0;
- /**
- * Sets the iterator to refer to the first code point in its
- * iteration range, returns that code point, and moves the position
- * to the second code point. This is an alternative to setToStart()
- * for forward iteration with next32PostInc().
- * @return the first code point in its iteration range.
- * @stable ICU 2.0
- */
- virtual UChar32 first32PostInc(void);
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, returns that code point, and moves the position
+ * to the second code point. This is an alternative to setToStart()
+ * for forward iteration with next32PostInc().
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32PostInc(void);
- /**
- * Sets the iterator to refer to the first code unit or code point in its
- * iteration range. This can be used to begin a forward
- * iteration with nextPostInc() or next32PostInc().
- * @return the start position of the iteration range
- * @stable ICU 2.0
- */
- inline int32_t setToStart();
+ /**
+ * Sets the iterator to refer to the first code unit or code point in its
+ * iteration range. This can be used to begin a forward
+ * iteration with nextPostInc() or next32PostInc().
+ * @return the start position of the iteration range
+ * @stable ICU 2.0
+ */
+ inline int32_t setToStart();
- /**
- * Sets the iterator to refer to the last code unit in its
- * iteration range, and returns that code unit.
- * This can be used to begin an iteration with previous().
- * @return the last code unit.
- * @stable ICU 2.0
- */
- virtual UChar last(void) = 0;
+ /**
+ * Sets the iterator to refer to the last code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous().
+ * @return the last code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar last(void) = 0;
- /**
- * Sets the iterator to refer to the last code point in its
- * iteration range, and returns that code unit.
- * This can be used to begin an iteration with previous32().
- * @return the last code point.
- * @stable ICU 2.0
- */
- virtual UChar32 last32(void) = 0;
+ /**
+ * Sets the iterator to refer to the last code point in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous32().
+ * @return the last code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 last32(void) = 0;
- /**
- * Sets the iterator to the end of its iteration range, just behind
- * the last code unit or code point. This can be used to begin a backward
- * iteration with previous() or previous32().
- * @return the end position of the iteration range
- * @stable ICU 2.0
- */
- inline int32_t setToEnd();
+ /**
+ * Sets the iterator to the end of its iteration range, just behind
+ * the last code unit or code point. This can be used to begin a backward
+ * iteration with previous() or previous32().
+ * @return the end position of the iteration range
+ * @stable ICU 2.0
+ */
+ inline int32_t setToEnd();
- /**
- * Sets the iterator to refer to the "position"-th code unit
- * in the text-storage object the iterator refers to, and
- * returns that code unit.
- * @param position the "position"-th code unit in the text-storage object
- * @return the "position"-th code unit.
- * @stable ICU 2.0
- */
- virtual UChar setIndex(int32_t position) = 0;
+ /**
+ * Sets the iterator to refer to the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code unit.
+ * @param position the "position"-th code unit in the text-storage object
+ * @return the "position"-th code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar setIndex(int32_t position) = 0;
- /**
- * Sets the iterator to refer to the beginning of the code point
- * that contains the "position"-th code unit
- * in the text-storage object the iterator refers to, and
- * returns that code point.
- * The current position is adjusted to the beginning of the code point
- * (its first code unit).
- * @param position the "position"-th code unit in the text-storage object
- * @return the "position"-th code point.
- * @stable ICU 2.0
- */
- virtual UChar32 setIndex32(int32_t position) = 0;
+ /**
+ * Sets the iterator to refer to the beginning of the code point
+ * that contains the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code point.
+ * The current position is adjusted to the beginning of the code point
+ * (its first code unit).
+ * @param position the "position"-th code unit in the text-storage object
+ * @return the "position"-th code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 setIndex32(int32_t position) = 0;
- /**
- * Returns the code unit the iterator currently refers to.
- * @return the current code unit.
- * @stable ICU 2.0
- */
- virtual UChar current(void) const = 0;
+ /**
+ * Returns the code unit the iterator currently refers to.
+ * @return the current code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar current(void) const = 0;
- /**
- * Returns the code point the iterator currently refers to.
- * @return the current code point.
- * @stable ICU 2.0
- */
- virtual UChar32 current32(void) const = 0;
+ /**
+ * Returns the code point the iterator currently refers to.
+ * @return the current code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 current32(void) const = 0;
- /**
- * Advances to the next code unit in the iteration range
- * (toward endIndex()), and returns that code unit. If there are
- * no more code units to return, returns DONE.
- * @return the next code unit.
- * @stable ICU 2.0
- */
- virtual UChar next(void) = 0;
+ /**
+ * Advances to the next code unit in the iteration range
+ * (toward endIndex()), and returns that code unit. If there are
+ * no more code units to return, returns DONE.
+ * @return the next code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar next(void) = 0;
- /**
- * Advances to the next code point in the iteration range
- * (toward endIndex()), and returns that code point. If there are
- * no more code points to return, returns DONE.
- * Note that iteration with "pre-increment" semantics is less
- * efficient than iteration with "post-increment" semantics
- * that is provided by next32PostInc().
- * @return the next code point.
- * @stable ICU 2.0
- */
- virtual UChar32 next32(void) = 0;
+ /**
+ * Advances to the next code point in the iteration range
+ * (toward endIndex()), and returns that code point. If there are
+ * no more code points to return, returns DONE.
+ * Note that iteration with "pre-increment" semantics is less
+ * efficient than iteration with "post-increment" semantics
+ * that is provided by next32PostInc().
+ * @return the next code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32(void) = 0;
- /**
- * Advances to the previous code unit in the iteration range
- * (toward startIndex()), and returns that code unit. If there are
- * no more code units to return, returns DONE.
- * @return the previous code unit.
- * @stable ICU 2.0
- */
- virtual UChar previous(void) = 0;
+ /**
+ * Advances to the previous code unit in the iteration range
+ * (toward startIndex()), and returns that code unit. If there are
+ * no more code units to return, returns DONE.
+ * @return the previous code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar previous(void) = 0;
- /**
- * Advances to the previous code point in the iteration range
- * (toward startIndex()), and returns that code point. If there are
- * no more code points to return, returns DONE.
- * @return the previous code point.
- * @stable ICU 2.0
- */
- virtual UChar32 previous32(void) = 0;
+ /**
+ * Advances to the previous code point in the iteration range
+ * (toward startIndex()), and returns that code point. If there are
+ * no more code points to return, returns DONE.
+ * @return the previous code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 previous32(void) = 0;
- /**
- * Returns FALSE if there are no more code units or code points
- * before the current position in the iteration range.
- * This is used with previous() or previous32() in backward
- * iteration.
- * @return FALSE if there are no more code units or code points
- * before the current position in the iteration range, return TRUE otherwise.
- * @stable ICU 2.0
- */
- virtual UBool hasPrevious() = 0;
+ /**
+ * Returns FALSE if there are no more code units or code points
+ * before the current position in the iteration range.
+ * This is used with previous() or previous32() in backward
+ * iteration.
+ * @return FALSE if there are no more code units or code points
+ * before the current position in the iteration range, return TRUE otherwise.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasPrevious() = 0;
- /**
- * Returns the numeric index in the underlying text-storage
- * object of the character returned by first(). Since it's
- * possible to create an iterator that iterates across only
- * part of a text-storage object, this number isn't
- * necessarily 0.
- * @returns the numeric index in the underlying text-storage
- * object of the character returned by first().
- * @stable ICU 2.0
- */
- inline int32_t startIndex(void) const;
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the character returned by first(). Since it's
+ * possible to create an iterator that iterates across only
+ * part of a text-storage object, this number isn't
+ * necessarily 0.
+ * @returns the numeric index in the underlying text-storage
+ * object of the character returned by first().
+ * @stable ICU 2.0
+ */
+ inline int32_t startIndex(void) const;
- /**
- * Returns the numeric index in the underlying text-storage
- * object of the position immediately BEYOND the character
- * returned by last().
- * @return the numeric index in the underlying text-storage
- * object of the position immediately BEYOND the character
- * returned by last().
- * @stable ICU 2.0
- */
- inline int32_t endIndex(void) const;
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the position immediately BEYOND the character
+ * returned by last().
+ * @return the numeric index in the underlying text-storage
+ * object of the position immediately BEYOND the character
+ * returned by last().
+ * @stable ICU 2.0
+ */
+ inline int32_t endIndex(void) const;
- /**
- * Returns the numeric index in the underlying text-storage
- * object of the character the iterator currently refers to
- * (i.e., the character returned by current()).
- * @return the numberic index in the text-storage object of
- * the character the iterator currently refers to
- * @stable ICU 2.0
- */
- inline int32_t getIndex(void) const;
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the character the iterator currently refers to
+ * (i.e., the character returned by current()).
+ * @return the numberic index in the text-storage object of
+ * the character the iterator currently refers to
+ * @stable ICU 2.0
+ */
+ inline int32_t getIndex(void) const;
- /**
- * Returns the length of the entire text in the underlying
- * text-storage object.
- * @return the length of the entire text in the text-storage object
- * @stable ICU 2.0
- */
- inline int32_t getLength() const;
+ /**
+ * Returns the length of the entire text in the underlying
+ * text-storage object.
+ * @return the length of the entire text in the text-storage object
+ * @stable ICU 2.0
+ */
+ inline int32_t getLength() const;
- /**
- * Moves the current position relative to the start or end of the
- * iteration range, or relative to the current position itself.
- * The movement is expressed in numbers of code units forward
- * or backward by specifying a positive or negative delta.
- * @delta the position relative to origin. A positive delta means forward;
- * a negative delta means backward.
- * @origin Origin enumeration {kStart, kCurrent, kEnd}
- * @return the new position
- * @stable ICU 2.0
- */
- virtual int32_t move(int32_t delta, EOrigin origin) = 0;
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * @delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+ virtual int32_t move(int32_t delta, EOrigin origin) = 0;
- /**
- * Moves the current position relative to the start or end of the
- * iteration range, or relative to the current position itself.
- * The movement is expressed in numbers of code points forward
- * or backward by specifying a positive or negative delta.
- * @delta the position relative to origin. A positive delta means forward;
- * a negative delta means backward.
- * @origin Origin enumeration {kStart, kCurrent, kEnd}
- * @return the new position
- * @stable ICU 2.0
- */
- virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code points forward
+ * or backward by specifying a positive or negative delta.
+ * @delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+ virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
- /**
- * Copies the text under iteration into the UnicodeString
- * referred to by "result".
- * @param result Receives a copy of the text under iteration.
- * @stable ICU 2.0
- */
- virtual void getText(UnicodeString& result) = 0;
+ /**
+ * Copies the text under iteration into the UnicodeString
+ * referred to by "result".
+ * @param result Receives a copy of the text under iteration.
+ * @stable ICU 2.0
+ */
+ virtual void getText(UnicodeString& result) = 0;
protected:
- /**
- * Empty constructor.
- * @stable ICU 2.0
- */
- CharacterIterator();
+ /**
+ * Empty constructor.
+ * @stable ICU 2.0
+ */
+ CharacterIterator();
- /**
- * Constructor, just setting the length field in this base class.
- * @stable ICU 2.0
- */
- CharacterIterator(int32_t length);
+ /**
+ * Constructor, just setting the length field in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length);
- /**
- * Constructor, just setting the length and position fields in this base class.
- * @stable ICU 2.0
- */
- CharacterIterator(int32_t length, int32_t position);
+ /**
+ * Constructor, just setting the length and position fields in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length, int32_t position);
- /**
- * Constructor, just setting the length, start, end, and position fields in this base class.
- * @stable ICU 2.0
- */
- CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
+ /**
+ * Constructor, just setting the length, start, end, and position fields in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
- /**
- * Copy constructor.
- *
- * @param that The CharacterIterator to be copied
- * @stable ICU 2.0
- */
- CharacterIterator(const CharacterIterator &that);
+ /**
+ * Copy constructor.
+ *
+ * @param that The CharacterIterator to be copied
+ * @stable ICU 2.0
+ */
+ CharacterIterator(const CharacterIterator &that);
- /**
- * Assignment operator. Sets this CharacterIterator to have the same behavior,
- * as the one passed in.
- * @param that The CharacterIterator passed in.
- * @return the newly set CharacterIterator.
- * @stable ICU 2.0
- */
- CharacterIterator &operator=(const CharacterIterator &that);
+ /**
+ * Assignment operator. Sets this CharacterIterator to have the same behavior,
+ * as the one passed in.
+ * @param that The CharacterIterator passed in.
+ * @return the newly set CharacterIterator.
+ * @stable ICU 2.0
+ */
+ CharacterIterator &operator=(const CharacterIterator &that);
- /**
- * Base class text length field.
- * Necessary this for correct getText() and hashCode().
- * @stable ICU 2.0
- */
- int32_t textLength;
+ /**
+ * Base class text length field.
+ * Necessary this for correct getText() and hashCode().
+ * @stable ICU 2.0
+ */
+ int32_t textLength;
- /**
- * Base class field for the current position.
- * @stable ICU 2.0
- */
- int32_t pos;
+ /**
+ * Base class field for the current position.
+ * @stable ICU 2.0
+ */
+ int32_t pos;
- /**
- * Base class field for the start of the iteration range.
- * @stable ICU 2.0
- */
- int32_t begin;
+ /**
+ * Base class field for the start of the iteration range.
+ * @stable ICU 2.0
+ */
+ int32_t begin;
- /**
- * Base class field for the end of the iteration range.
- * @stable ICU 2.0
- */
- int32_t end;
+ /**
+ * Base class field for the end of the iteration range.
+ * @stable ICU 2.0
+ */
+ int32_t end;
};
inline UBool
ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
- return !operator==(that);
+ return !operator==(that);
}
inline int32_t
CharacterIterator::setToStart() {
- return move(0, kStart);
+ return move(0, kStart);
}
inline int32_t
CharacterIterator::setToEnd() {
- return move(0, kEnd);
+ return move(0, kEnd);
}
inline int32_t
CharacterIterator::startIndex(void) const {
- return begin;
+ return begin;
}
inline int32_t
CharacterIterator::endIndex(void) const {
- return end;
+ return end;
}
inline int32_t
CharacterIterator::getIndex(void) const {
- return pos;
+ return pos;
}
inline int32_t
CharacterIterator::getLength(void) const {
- return textLength;
+ return textLength;
}
U_NAMESPACE_END
Index: dbbi.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/dbbi.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- dbbi.h 10 Sep 2003 02:42:04 -0000 1.1
+++ dbbi.h 6 Apr 2004 10:08:07 -0000 1.2
@@ -84,11 +84,6 @@
DictionaryBasedBreakIteratorTables *fTables;
- /**
- * Class ID
- */
- static const char fgClassID;
-
/**=======================================================================
* Create a dictionary based break boundary detection iterator.
* @param tablesImage The location for the dictionary to be loaded into memory
@@ -176,6 +171,19 @@
virtual int32_t preceding(int32_t offset);
/**
+ * Returns the class ID for this class. This is useful only for
+ * comparing to a return value from getDynamicClassID(). For example:
+ *
+ * Base* polymorphic_pointer = createPolymorphicObject();
+ * if (polymorphic_pointer->getDynamicClassID() ==
+ * Derived::getStaticClassID()) ...
+ *
+ * @return The class ID for all objects of this class.
+ * @stable ICU 2.0
+ */
+ static UClassID getStaticClassID(void);
+
+ /**
* Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
* This method is to implement a simple version of RTTI, since not all
* C++ compilers support genuine RTTI. Polymorphic operator==() and
@@ -188,19 +196,6 @@
*/
virtual UClassID getDynamicClassID(void) const;
- /**
- * Returns the class ID for this class. This is useful only for
- * comparing to a return value from getDynamicClassID(). For example:
- *
- * Base* polymorphic_pointer = createPolymorphicObject();
- * if (polymorphic_pointer->getDynamicClassID() ==
- * Derived::getStaticClassID()) ...
- *
- * @return The class ID for all objects of this class.
- * @stable ICU 2.0
- */
- static inline UClassID getStaticClassID(void);
-
protected:
//=======================================================================
// implementation
@@ -269,14 +264,6 @@
friend class DictionaryBasedBreakIteratorTables;
friend class BreakIterator;
};
-
-inline UClassID
-DictionaryBasedBreakIterator::getStaticClassID(void)
-{ return (UClassID)(&fgClassID); }
-
-inline UClassID
-DictionaryBasedBreakIterator::getDynamicClassID(void) const
-{ return DictionaryBasedBreakIterator::getStaticClassID(); }
U_NAMESPACE_END
Index: locid.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/locid.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- locid.h 10 Sep 2003 02:42:04 -0000 1.4
+++ locid.h 6 Apr 2004 10:08:07 -0000 1.5
@@ -34,6 +34,7 @@
#include "unicode/unistr.h"
#include "unicode/putil.h"
#include "unicode/uloc.h"
+#include "unicode/strenum.h"
/**
* \file
@@ -250,6 +251,8 @@
* @param country Uppercase two-letter ISO-3166 code. (optional)
* @param variant Uppercase vendor and browser specific code. See class
* description. (optional)
+ * @param keywordsAndValues A string consisting of keyword/values pairs, such as
+ * "collation=phonebook;currency=euro"
*
* @see getDefault
* @see uloc_getDefault
@@ -257,7 +260,8 @@
*/
Locale( const char * language,
const char * country = 0,
- const char * variant = 0);
+ const char * variant = 0,
+ const char * keywordsAndValues = 0);
/**
* Initializes a Locale object from another Locale object.
@@ -303,6 +307,19 @@
UBool operator!=(const Locale& other) const;
/**
+ * Clone this object.
+ * Clones can be used concurrently in multiple threads.
+ * If an error occurs, then NULL is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @draft ICU 2.8
+ */
+ Locale *clone() const;
+
+ /**
* Common methods of getting the current default Locale. Used for the
* presentation: menus, dialogs, etc. Generally set once when your applet or
* application is initialized, then never reset. (If you do reset the
@@ -325,13 +342,14 @@
* setDefault() only changes ICU's default locale ID, <strong>not</strong>
* the default locale ID of the runtime environment.
*
- * @param newLocale Locale to set to.
+ * @param newLocale Locale to set to. If NULL, set to the value obtained
+ * from the runtime environement.
* @param success The error code.
* @system
* @stable ICU 2.0
*/
static void setDefault(const Locale& newLocale,
- UErrorCode& success);
+ UErrorCode& success);
/**
@@ -354,6 +372,15 @@
inline const char * getLanguage( ) const;
/**
+ * Returns the locale's ISO-15924 abbreviation script code.
+ * @return An alias to the code
+ * @see uscript_getShortName
+ * @see uscript_getCode
+ * @draft ICU 2.8
+ */
+ inline const char * getScript( ) const;
+
+ /**
* Returns the locale's ISO-3166 country code.
* @return An alias to the code
* @stable ICU 2.0
@@ -378,6 +405,37 @@
inline const char * getName() const;
/**
+ * Returns the programmatic name of the entire locale as getName would return,
+ * but without keywords.
+ * @return A pointer to "name".
+ * @see getName
+ * @draft ICU 2.8
+ */
+ const char * getBaseName() const;
+
+
+ /**
+ * Gets the list of keywords for the specified locale.
+ *
+ * @return pointer to StringEnumeration class. Client must dispose of it by calling delete.
+ * @param status Returns any error information while performing this operation.
+ * @draft ICU 2.8
+ */
+ StringEnumeration * createKeywords(UErrorCode &status) const;
+
+ /**
+ * Get the value for a keyword.
+ *
+ * @param keywordName name of the keyword for which we want the value. Case insensitive.
+ * @param status Returns any error information while performing this operation.
+ * @return pointer to the keyword value owned by the Locale object or NULL if there is
+ * no such a keyword.
+ *
+ * @draft ICU 2.8
+ */
+ int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const;
+
+ /**
* returns the locale's three-letter language code, as specified
* in ISO draft standard ISO-639-2..
* @return An alias to the code, or NULL
@@ -414,21 +472,49 @@
/**
* Fills in "dispLang" with the name of this locale's language in a format suitable for
- * user display in the locale specified by "inLocale". For example, if the locale's
- * language code is "en" and inLocale's language code is "fr", this function would set
+ * user display in the locale specified by "displayLocale". For example, if the locale's
+ * language code is "en" and displayLocale's language code is "fr", this function would set
* dispLang to "Anglais".
- * @param inLocale Specifies the locale to be used to display the name. In other words,
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::getFrench() for
- * inLocale would result in "Anglais", while passing Locale::getGerman()
- * for inLocale would result in "Englisch".
+ * displayLocale would result in "Anglais", while passing Locale::getGerman()
+ * for displayLocale would result in "Englisch".
* @param dispLang Receives the language's display name.
* @return A reference to "dispLang".
* @stable ICU 2.0
*/
- UnicodeString& getDisplayLanguage( const Locale& inLocale,
+ UnicodeString& getDisplayLanguage( const Locale& displayLocale,
UnicodeString& dispLang) const;
/**
+ * Fills in "dispScript" with the name of this locale's script in a format suitable
+ * for user display in the default locale. For example, if the locale's script code
+ * is "LATN" and the default locale's language code is "en", this function would set
+ * dispScript to "Latin".
+ * @param dispScript Receives the scripts's display name.
+ * @return A reference to "dispScript".
+ * @draft ICU 2.8
+ */
+ UnicodeString& getDisplayScript( UnicodeString& dispScript) const;
+
+ /**
+ * Fills in "dispScript" with the name of this locale's country in a format suitable
+ * for user display in the locale specified by "displayLocale". For example, if the locale's
+ * script code is "LATN" and displayLocale's language code is "en", this function would set
+ * dispScript to "Latin".
+ * @param displayLocale Specifies the locale to be used to display the name. In other
+ * words, if the locale's script code is "LATN", passing
+ * Locale::getFrench() for displayLocale would result in "", while
+ * passing Locale::getGerman() for displayLocale would result in
+ * "".
+ * @param dispScript Receives the scripts's display name.
+ * @return A reference to "dispScript".
+ * @draft ICU 2.8
+ */
+ UnicodeString& getDisplayScript( const Locale& displayLocale,
+ UnicodeString& dispScript) const;
+
+ /**
* Fills in "dispCountry" with the name of this locale's country in a format suitable
* for user display in the default locale. For example, if the locale's country code
* is "FR" and the default locale's language code is "en", this function would set
@@ -441,19 +527,19 @@
/**
* Fills in "dispCountry" with the name of this locale's country in a format suitable
- * for user display in the locale specified by "inLocale". For example, if the locale's
- * country code is "US" and inLocale's language code is "fr", this function would set
+ * for user display in the locale specified by "displayLocale". For example, if the locale's
+ * country code is "US" and displayLocale's language code is "fr", this function would set
* dispCountry to "Etats-Unis".
- * @param inLocale Specifies the locale to be used to display the name. In other
+ * @param displayLocale Specifies the locale to be used to display the name. In other
* words, if the locale's country code is "US", passing
- * Locale::getFrench() for inLocale would result in "États-Unis", while
- * passing Locale::getGerman() for inLocale would result in
+ * Locale::getFrench() for displayLocale would result in "États-Unis", while
+ * passing Locale::getGerman() for displayLocale would result in
* "Vereinigte Staaten".
* @param dispCountry Receives the country's display name.
* @return A reference to "dispCountry".
* @stable ICU 2.0
*/
- UnicodeString& getDisplayCountry( const Locale& inLocale,
+ UnicodeString& getDisplayCountry( const Locale& displayLocale,
UnicodeString& dispCountry) const;
/**
@@ -467,13 +553,13 @@
/**
* Fills in "dispVar" with the name of this locale's variant code in a format
- * suitable for user display in the locale specified by "inLocale".
- * @param inLocale Specifies the locale to be used to display the name.
+ * suitable for user display in the locale specified by "displayLocale".
+ * @param displayLocale Specifies the locale to be used to display the name.
* @param dispVar Receives the variant's display name.
* @return A reference to "dispVar".
* @stable ICU 2.0
*/
- UnicodeString& getDisplayVariant( const Locale& inLocale,
+ UnicodeString& getDisplayVariant( const Locale& displayLocale,
UnicodeString& dispVar) const;
/**
@@ -491,17 +577,17 @@
/**
* Fills in "name" with the name of this locale in a format suitable for user display
- * in the locale specfied by "inLocale". This function uses getDisplayLanguage(),
+ * in the locale specfied by "displayLocale". This function uses getDisplayLanguage(),
* getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
- * name in the format "language (country[,variant])". For example, if inLocale is
+ * name in the format "language (country[,variant])". For example, if displayLocale is
* fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's
* display name would be "norvégien (Norvège,NY)".
- * @param inLocale Specifies the locale to be used to display the name.
+ * @param displayLocale Specifies the locale to be used to display the name.
* @param name Receives the locale's display name.
* @return A reference to "name".
* @stable ICU 2.0
*/
- UnicodeString& getDisplayName( const Locale& inLocale,
+ UnicodeString& getDisplayName( const Locale& displayLocale,
UnicodeString& name) const;
/**
@@ -558,18 +644,18 @@
static const char* const* getISOLanguages();
/**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ * ICU "poor man's RTTI", returns a UClassID for this class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- virtual inline UClassID getDynamicClassID() const;
+ static UClassID getStaticClassID();
/**
- * ICU "poor man's RTTI", returns a UClassID for this class.
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- static inline UClassID getStaticClassID();
+ virtual UClassID getDynamicClassID() const;
protected: /* only protected for testing purposes. DO NOT USE. */
/**
@@ -603,32 +689,26 @@
static Locale *getLocaleCache(void);
char language[ULOC_LANG_CAPACITY];
+ char script[ULOC_SCRIPT_CAPACITY];
char country[ULOC_COUNTRY_CAPACITY];
int32_t variantBegin;
char* fullName;
char fullNameBuffer[ULOC_FULLNAME_CAPACITY];
+ // name without keywords
+ char* baseName;
+ char baseNameBuffer[ULOC_FULLNAME_CAPACITY];
UBool fIsBogus;
- /**
- * The address of this static class variable serves as this class's ID
- * for ICU "poor man's RTTI".
- */
- static const char fgClassID;
-
static const Locale &getLocale(int locid);
+ /**
+ * A friend to allow the default locale to be set by either the C or C++ API.
+ * @internal
+ */
friend void locale_set_default_internal(const char *);
};
-inline UClassID
-Locale::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-Locale::getDynamicClassID() const
-{ return Locale::getStaticClassID(); }
-
inline UBool
Locale::operator!=(const Locale& other) const
{
@@ -648,6 +728,12 @@
}
inline const char *
+Locale::getScript() const
+{
+ return script;
+}
+
+inline const char *
Locale::getVariant() const
{
return &fullName[variantBegin];
@@ -667,3 +753,4 @@
U_NAMESPACE_END
#endif
+
Index: normlzr.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/normlzr.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- normlzr.h 10 Sep 2003 02:42:04 -0000 1.5
+++ normlzr.h 6 Apr 2004 10:08:07 -0000 1.6
@@ -300,7 +300,7 @@
* "mode" normalization form.
*
* @see quickCheck
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
static inline UBool
isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
@@ -420,7 +420,7 @@
* @see u_strCompare
* @see u_strCaseCompare
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
static inline int32_t
compare(const UnicodeString &s1, const UnicodeString &s2,
@@ -692,18 +692,18 @@
void getText(UnicodeString& result);
/**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
- * @return a UClassID for the actual class.
- * @draft ICU 2.2
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ * @returns a UClassID for this class.
+ * @stable ICU 2.2
*/
- virtual inline UClassID getDynamicClassID() const;
+ static UClassID getStaticClassID();
/**
- * ICU "poor man's RTTI", returns a UClassID for this class.
- * @returns a UClassID for this class.
- * @draft ICU 2.2
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ * @return a UClassID for the actual class.
+ * @stable ICU 2.2
*/
- static inline UClassID getStaticClassID();
+ virtual UClassID getDynamicClassID() const;
private:
//-------------------------------------------------------------------------
@@ -739,24 +739,11 @@
UnicodeString buffer;
int32_t bufferPos;
- /**
- * The address of this static class variable serves as this class's ID
- * for ICU "poor man's RTTI".
- */
- static const char fgClassID;
};
//-------------------------------------------------------------------------
// Inline implementations
//-------------------------------------------------------------------------
-
-inline UClassID
-Normalizer::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-Normalizer::getDynamicClassID() const
-{ return Normalizer::getStaticClassID(); }
inline UBool
Normalizer::operator!= (const Normalizer& other) const
Index: parsepos.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/parsepos.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- parsepos.h 10 Sep 2003 02:42:04 -0000 1.1
+++ parsepos.h 6 Apr 2004 10:08:07 -0000 1.2
@@ -46,8 +46,10 @@
* @stable ICU 2.0
*/
ParsePosition()
- : UObject()
- { this->index = 0; this->errorIndex = -1; }
+ : UObject(),
+ index(0),
+ errorIndex(-1)
+ {}
/**
* Create a new ParsePosition with the given initial index.
@@ -55,8 +57,10 @@
* @stable ICU 2.0
*/
ParsePosition(int32_t newIndex)
- : UObject()
- { this->index = newIndex; this->errorIndex = -1; }
+ : UObject(),
+ index(newIndex),
+ errorIndex(-1)
+ {}
/**
* Copy constructor
@@ -64,14 +68,16 @@
* @stable ICU 2.0
*/
ParsePosition(const ParsePosition& copy)
- : UObject(copy)
- { this->index = copy.index; this->errorIndex = copy.errorIndex; }
+ : UObject(copy),
+ index(copy.index),
+ errorIndex(copy.errorIndex)
+ {}
/**
* Destructor
* @stable ICU 2.0
*/
- ~ParsePosition() {}
+ virtual ~ParsePosition();
/**
* Assignment operator
@@ -94,6 +100,19 @@
UBool operator!=(const ParsePosition& that) const;
/**
+ * Clone this object.
+ * Clones can be used concurrently in multiple threads.
+ * If an error occurs, then NULL is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @draft ICU 2.8
+ */
+ ParsePosition *clone() const;
+
+ /**
* Retrieve the current parse position. On input to a parse method, this
* is the index of the character at which parsing will begin; on output, it
* is the index of the character following the last character parsed.
@@ -126,18 +145,18 @@
int32_t getErrorIndex(void) const;
/**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ * ICU "poor man's RTTI", returns a UClassID for this class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- virtual inline UClassID getDynamicClassID() const;
+ static UClassID getStaticClassID();
/**
- * ICU "poor man's RTTI", returns a UClassID for this class.
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- static inline UClassID getStaticClassID();
+ virtual UClassID getDynamicClassID() const;
private:
/**
@@ -153,20 +172,7 @@
*/
int32_t errorIndex;
- /**
- * The address of this static class variable serves as this class's ID
- * for ICU "poor man's RTTI".
- */
- static const char fgClassID;
};
-
-inline UClassID
-ParsePosition::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-ParsePosition::getDynamicClassID() const
-{ return ParsePosition::getStaticClassID(); }
inline ParsePosition&
ParsePosition::operator=(const ParsePosition& copy)
Index: platform.h.in
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/platform.h.in,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- platform.h.in 10 Sep 2003 02:42:04 -0000 1.5
+++ platform.h.in 6 Apr 2004 10:08:07 -0000 1.6
@@ -44,6 +44,14 @@
#define U_IOSTREAM_SOURCE @U_IOSTREAM_SOURCE@
#endif
+#ifndef U_DEBUG
+#define U_DEBUG @ENABLE_DEBUG@
+#endif
+
+#ifndef U_RELEASE
+#define U_RELEASE @ENABLE_RELEASE@
+#endif
+
/* Determines whether specific types are available */
#ifndef U_HAVE_INT8_T
#define U_HAVE_INT8_T @HAVE_INT8_T@
@@ -104,6 +112,11 @@
#define U_HAVE_PLACEMENT_NEW @U_HAVE_PLACEMENT_NEW@
#endif
+/* Determine whether to enable tracing. */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING @U_ENABLE_TRACING@
+#endif
+
/* Define the library suffix in a C syntax. */
#define U_HAVE_LIB_SUFFIX @U_HAVE_LIB_SUFFIX@
#define U_LIB_SUFFIX_C_NAME @ICULIBSUFFIXCNAME@
@@ -264,4 +277,3 @@
/*===========================================================================*/
#define U_MAKE "@U_MAKE@"
-
Index: pos400.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/pos400.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- pos400.h 10 Sep 2003 02:42:04 -0000 1.3
+++ pos400.h 6 Apr 2004 10:08:07 -0000 1.4
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1997-2001, International Business Machines
+* Copyright (C) 1997-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -66,6 +66,11 @@
setting in umachine.h which is for all platforms. */
#ifndef U_OVERRIDE_CXX_ALLOCATION
#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+
+/* Determine whether to enable tracing. */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 1
#endif
/*===========================================================================*/
Index: putil.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/putil.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- putil.h 10 Sep 2003 02:42:04 -0000 1.4
+++ putil.h 6 Apr 2004 10:08:07 -0000 1.5
@@ -238,7 +238,6 @@
/**
* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70.
* @return the UTC time measured in seconds
- * @stable ICU 2.0
* @internal
*/
U_CAPI int32_t U_EXPORT2 uprv_getUTCtime(void);
@@ -314,18 +313,24 @@
*/
#ifdef XP_MAC
# define U_FILE_SEP_CHAR ':'
+# define U_FILE_ALT_SEP_CHAR ':'
# define U_PATH_SEP_CHAR ';'
# define U_FILE_SEP_STRING ":"
+# define U_FILE_ALT_SEP_STRING ":"
# define U_PATH_SEP_STRING ";"
#elif defined(WIN32) || defined(OS2)
# define U_FILE_SEP_CHAR '\\'
+# define U_FILE_ALT_SEP_CHAR '/'
# define U_PATH_SEP_CHAR ';'
# define U_FILE_SEP_STRING "\\"
+# define U_FILE_ALT_SEP_STRING "/"
# define U_PATH_SEP_STRING ";"
#else
# define U_FILE_SEP_CHAR '/'
+# define U_FILE_ALT_SEP_CHAR '/'
# define U_PATH_SEP_CHAR ':'
# define U_FILE_SEP_STRING "/"
+# define U_FILE_ALT_SEP_STRING "/"
# define U_PATH_SEP_STRING ":"
#endif
@@ -369,9 +374,35 @@
u_UCharsToChars(const UChar *us, char *cs, int32_t length);
/**
+ * Check if a char string only contains invariant characters.
+ * See utypes.h for details.
+ *
+ * @param s Input string pointer.
+ * @param length Length of the string, can be -1 if NUL-terminated.
+ * @return TRUE if s contains only invariant characters.
+ *
+ * @internal (ICU 2.8)
+ */
+U_CAPI UBool U_EXPORT2
+uprv_isInvariantString(const char *s, int32_t length);
+
+/**
+ * Check if a Unicode string only contains invariant characters.
+ * See utypes.h for details.
+ *
+ * @param s Input string pointer.
+ * @param length Length of the string, can be -1 if NUL-terminated.
+ * @return TRUE if s contains only invariant characters.
+ *
+ * @internal (ICU 2.8)
+ */
+U_CAPI UBool U_EXPORT2
+uprv_isInvariantUString(const UChar *s, int32_t length);
+
+/**
* \def U_UPPER_ORDINAL
* Get the ordinal number of an uppercase invariant character
- * @stable ICU 2.4
+ * @internal
*/
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
# define U_UPPER_ORDINAL(x) ((x)-'A')
@@ -415,7 +446,7 @@
*/
# define U_MAX_PTR(base) ((void *)(((char *)base)-((int32_t)(base))+((int32_t)0xffefff)))
# else
-# define U_MAX_PTR(base) ((void *)(((char *)(base)+0x7fffffff) > (char *)(base) ? ((char *)(base)+0x7fffffff) : (char *)-1))
+# define U_MAX_PTR(base) ((void *)(((char *)(base)+0x7fffffffu) > (char *)(base) ? ((char *)(base)+0x7fffffffu) : (char *)-1))
# endif
#endif
Index: pwin32.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/pwin32.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- pwin32.h 10 Sep 2003 02:42:04 -0000 1.5
+++ pwin32.h 6 Apr 2004 10:08:07 -0000 1.6
@@ -42,6 +42,22 @@
#define U_IOSTREAM_SOURCE 199711
#endif
+#ifndef U_DEBUG
+#ifdef _DEBUG
+#define U_DEBUG 1
+#else
+#define U_DEBUG 0
+#endif
+#endif
+
+#ifndef U_RELEASE
+#ifdef NDEBUG
+#define U_RELEASE 1
+#else
+#define U_RELEASE 0
+#endif
+#endif
+
/* Determines whether specific types are available */
#define U_HAVE_INT8_T 0
#define U_HAVE_UINT8_T 0
@@ -52,6 +68,10 @@
#define U_HAVE_INT64_T 0
#define U_HAVE_UINT64_T 0
+/* Define 64 bit limits */
+#define INT64_C(x) x
+#define UINT64_C(x) x
+
/* Define whether namespace is supported */
#define U_HAVE_NAMESPACE 1
@@ -65,6 +85,11 @@
/* Determine whether to override placement new and delete for STL. */
#ifndef U_HAVE_PLACEMENT_NEW
#define U_HAVE_PLACEMENT_NEW 1
+#endif
+
+/* Determine whether to enable tracing. */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 1
#endif
/*===========================================================================*/
Index: rbbi.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/rbbi.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- rbbi.h 10 Sep 2003 02:42:04 -0000 1.1
+++ rbbi.h 6 Apr 2004 10:08:07 -0000 1.2
@@ -30,6 +30,7 @@
class RuleBasedBreakIteratorTables;
class BreakIterator;
class RBBIDataWrapper;
+struct RBBIStateTable;
@@ -61,8 +62,6 @@
* @internal
*/
RBBIDataWrapper *fData;
- /** @internal */
- UTrie *fCharMappings;
/** Rule {tag} value for the most recent match.
* @internal
@@ -93,27 +92,12 @@
static UBool fTrace;
-
-private:
- /**
- * Class ID
- */
- static const char fgClassID;
-
protected:
//=======================================================================
// constructors
//=======================================================================
/**
- * This constructor uses the udata interface to create a BreakIterator
- * whose internal tables live in a memory-mapped file. "image" is a pointer
- * to the beginning of that file.
- * @internal
- */
- RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
-
- /**
* Constructor from a flattened set of RBBI data in malloced memory.
* RulesBasedBreakIterators built from a custom set of rules
* are created via this constructor; the rules are compiled
@@ -134,7 +118,7 @@
/** Default constructor. Creates an empty shell of an iterator, with no
* rules or text to iterate over. Object can subsequently be assigned to.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
RuleBasedBreakIterator();
@@ -152,11 +136,27 @@
* @param parseError In the event of a syntax error in the rules, provides the location
* within the rules of the problem.
* @param status Information on any errors encountered.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
RuleBasedBreakIterator( const UnicodeString &rules,
UParseError &parseError,
UErrorCode &status);
+
+
+ /**
+ * This constructor uses the udata interface to create a BreakIterator
+ * whose internal tables live in a memory-mapped file. "image" is an
+ * ICU UDataMemory handle for the pre-compiled break iterator tables.
+ * @param image handle to the memory image for the break iterator data.
+ * Ownership of the UDataMemory handle passes to the Break Iterator,
+ * which will be responsible for closing it when it is no longer needed.
+ * @param status Information on any errors encountered.
+ * @see udata_open
+ * @see #getBinaryRules
+ * @draft ICU 2.8
+ */
+ RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
+
/**
* Destructor
* @stable ICU 2.0
@@ -345,7 +345,7 @@
* returned break position.
*
* @see UWordBreak
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
virtual int32_t getRuleStatus() const;
@@ -360,7 +360,7 @@
* other classes have different class IDs.
* @stable ICU 2.0
*/
- inline virtual UClassID getDynamicClassID(void) const;
+ virtual UClassID getDynamicClassID(void) const;
/**
* Returns the class ID for this class. This is useful only for
@@ -373,7 +373,7 @@
* @return The class ID for all objects of this class.
* @stable ICU 2.0
*/
- inline static UClassID getStaticClassID(void);
+ static UClassID getStaticClassID(void);
/*
* Create a clone (copy) of this break iterator in memory provided
@@ -411,7 +411,7 @@
* is much faster than building one from the source form of the
* break rules.
*
- * The binary data is can only be used with the same version of ICU
+ * The binary data can only be used with the same version of ICU
* and on the same platform type (processor endian-ness)
*
* @param length Returns the length of the binary data. (Out paramter.)
@@ -473,24 +473,39 @@
*/
void init();
+private:
+
+ /**
+ * This method backs the iterator back up to a "safe position" in the text.
+ * This is a position that we know, without any context, must be a break position.
+ * The various calling methods then iterate forward from this safe position to
+ * the appropriate position to return. (For more information, see the description
+ * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
+ * @param statetable state table used of moving backwards
+ * @internal
+ */
+ int32_t handlePrevious(const RBBIStateTable *statetable);
+
+ /**
+ * This method is the actual implementation of the next() method. All iteration
+ * vectors through here. This method initializes the state machine to state 1
+ * and advances through the text character by character until we reach the end
+ * of the text or the state machine transitions to state 0. We update our return
+ * value every time the state machine passes through a possible end state.
+ * @param statetable state table used of moving forwards
+ * @internal
+ */
+ int32_t handleNext(const RBBIStateTable *statetable);
};
-//----------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
//
// Inline Functions Definitions ...
//
-//----------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
return !operator==(that);
-}
-
-inline UClassID RuleBasedBreakIterator::getStaticClassID(void) {
- return (UClassID)(&fgClassID);
-}
-
-inline UClassID RuleBasedBreakIterator::getDynamicClassID(void) const {
- return RuleBasedBreakIterator::getStaticClassID();
}
U_NAMESPACE_END
Index: rep.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/rep.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- rep.h 10 Sep 2003 02:42:04 -0000 1.4
+++ rep.h 6 Apr 2004 10:08:07 -0000 1.5
@@ -14,7 +14,6 @@
#ifndef REP_H
#define REP_H
-#include "unicode/utypes.h"
#include "unicode/uobject.h"
U_NAMESPACE_BEGIN
@@ -234,8 +233,6 @@
};
inline Replaceable::Replaceable() {}
-
-inline Replaceable::~Replaceable() {}
inline int32_t
Replaceable::length() const {
Index: resbund.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/resbund.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- resbund.h 10 Sep 2003 02:42:04 -0000 1.4
+++ resbund.h 6 Apr 2004 10:08:07 -0000 1.5
@@ -169,8 +169,24 @@
~ResourceBundle();
/**
+ * Clone this object.
+ * Clones can be used concurrently in multiple threads.
+ * If an error occurs, then NULL is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @draft ICU 2.8
+ */
+ ResourceBundle *clone() const;
+
+ /**
* Returns the size of a resource. Size for scalar types is always 1, and for vector/table types is
* the number of child resources.
+ * @warning Integer array is treated as a scalar type. There are no
+ * APIs to access individual members of an integer array. It
+ * is always returned as a whole.
*
* @return number of resources in a given resource.
* @stable ICU 2.0
@@ -387,7 +403,7 @@
* @return A version number string as specified in the resource bundle or its parent.
* The caller does not own this string.
* @see getVersion
- * @stable ICU 2.0
+ * @deprecated ICU 2.8 Use getVersion instead.
*/
const char*
getVersionNumber(void) const;
@@ -406,24 +422,37 @@
* Return the Locale associated with this ResourceBundle.
*
* @return a Locale object
- * @stable ICU 2.0
+ * @deprecated ICU 2.8 Use getLocale(ULocDataLocaleType type, UErrorCode &status) overload instead.
*/
const Locale&
getLocale(void) const;
/**
+ * Return the Locale associated with this ResourceBundle.
+ * @param type You can choose between requested, valid and actual
+ * locale. For description see the definition of
+ * ULocDataLocaleType in uloc.h
+ * @param status just for catching illegal arguments
+ *
+ * @return a Locale object
+ * @draft ICU 2.8
+ */
+ const Locale
+ getLocale(ULocDataLocaleType type, UErrorCode &status) const;
+
+ /**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- virtual inline UClassID getDynamicClassID() const;
+ virtual UClassID getDynamicClassID() const;
/**
* ICU "poor man's RTTI", returns a UClassID for this class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- static inline UClassID getStaticClassID();
+ static UClassID getStaticClassID();
private:
ResourceBundle(); // default constructor not implemented
@@ -432,20 +461,7 @@
void constructForLocale(const UnicodeString& path, const Locale& locale, UErrorCode& error);
Locale *locName;
- /**
- * The address of this static class variable serves as this class's ID
- * for ICU "poor man's RTTI".
- */
- static const char fgClassID;
};
-
-inline UClassID
-ResourceBundle::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-ResourceBundle::getDynamicClassID() const
-{ return ResourceBundle::getStaticClassID(); }
U_NAMESPACE_END
#endif
Index: schriter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/schriter.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- schriter.h 10 Sep 2003 02:42:04 -0000 1.4
+++ schriter.h 6 Apr 2004 10:08:07 -0000 1.5
@@ -153,7 +153,7 @@
* @return a class ID for this class
* @stable ICU 2.0
*/
- static inline UClassID getStaticClassID(void);
+ static UClassID getStaticClassID(void);
protected:
/**
@@ -176,17 +176,7 @@
*/
UnicodeString text;
-private:
- static const char fgClassID;
};
-
-inline UClassID
-StringCharacterIterator::getStaticClassID(void)
-{ return (UClassID)(&fgClassID); }
-
-inline UClassID
-StringCharacterIterator::getDynamicClassID(void) const
-{ return StringCharacterIterator::getStaticClassID(); }
U_NAMESPACE_END
#endif
Index: strenum.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/strenum.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- strenum.h 10 Sep 2003 02:42:04 -0000 1.1
+++ strenum.h 6 Apr 2004 10:08:07 -0000 1.2
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2002, International Business Machines
+* Copyright (C) 2002-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -11,11 +11,10 @@
#define STRENUM_H
#include "unicode/uobject.h"
+#include "unicode/unistr.h"
U_NAMESPACE_BEGIN
-class UnicodeString;
-
/**
* Base class for 'pure' C++ implementations of uenum api. Adds a
* method that returns the next UnicodeString since in C++ this can
@@ -43,120 +42,206 @@
* upon any subsequent call to the enumeration's destructor, next,
* unext, snext, or reset.</p>
*
+ * ICU 2.8 adds some default implementations and helper functions
+ * for subclasses.
+ *
* @draft ICU 2.4
*/
class U_COMMON_API StringEnumeration : public UObject {
- public:
- /**
- * Destructor.
- * @draft ICU 2.4
- */
- virtual ~StringEnumeration();
+public:
+ /**
+ * Destructor.
+ * @draft ICU 2.4
+ */
+ virtual ~StringEnumeration();
- /**
- * <p>Return the number of elements that the iterator traverses. If
- * the iterator is out of sync with its service, status is set to
- * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
- *
- * <p>The return value will not change except possibly as a result of
- * a subsequent call to reset, or if the iterator becomes out of sync.</p>
- *
- * <p>This is a convenience function. It can end up being very
- * expensive as all the items might have to be pre-fetched
- * (depending on the storage format of the data being
- * traversed).</p>
- *
- * @param status the error code.
- * @return number of elements in the iterator.
- *
- * @draft ICU 2.4 */
- virtual int32_t count(UErrorCode& status) const = 0;
+ /**
+ * Clone this object, an instance of a subclass of StringEnumeration.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then NULL is returned.
+ * The clone functions in all subclasses return a base class pointer
+ * because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @draft ICU 2.8
+ */
+ virtual StringEnumeration *clone() const;
- /**
- * <p>Returns the next element as a NUL-terminated char*. If there
- * are no more elements, returns NULL. If the resultLength pointer
- * is not NULL, the length of the string (not counting the
- * terminating NUL) is returned at that address. If an error
- * status is returned, the value at resultLength is undefined.</p>
- *
- * <p>The returned pointer is owned by this iterator and must not be
- * deleted by the caller. The pointer is valid until the next call
- * to next, unext, snext, reset, or the enumerator's destructor.</p>
- *
- * <p>If the iterator is out of sync with its service, status is set
- * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
- *
- * <p>If the native service string is a UChar* string, it is
- * converted to char* with the invariant converter. If the
- * conversion fails (because a character cannot be converted) then
- * status is set to U_INVARIANT_CONVERSION_ERROR and the return
- * value is undefined (though not NULL).</p>
- *
- * @param status the error code.
- * @param resultLength a pointer to receive the length, can be NULL.
- * @return a pointer to the string, or NULL.
- *
- * @draft ICU 2.4
- */
- virtual const char* next(int32_t *resultLength, UErrorCode& status) = 0;
+ /**
+ * <p>Return the number of elements that the iterator traverses. If
+ * the iterator is out of sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
+ *
+ * <p>The return value will not change except possibly as a result of
+ * a subsequent call to reset, or if the iterator becomes out of sync.</p>
+ *
+ * <p>This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched
+ * (depending on the storage format of the data being
+ * traversed).</p>
+ *
+ * @param status the error code.
+ * @return number of elements in the iterator.
+ *
+ * @draft ICU 2.4 */
+ virtual int32_t count(UErrorCode& status) const = 0;
- /**
- * <p>Returns the next element as a NUL-terminated UChar*. If there
- * are no more elements, returns NULL. If the resultLength pointer
- * is not NULL, the length of the string (not counting the
- * terminating NUL) is returned at that address. If an error
- * status is returned, the value at resultLength is undefined.</p>
- *
- * <p>The returned pointer is owned by this iterator and must not be
- * deleted by the caller. The pointer is valid until the next call
- * to next, unext, snext, reset, or the enumerator's destructor.</p>
- *
- * <p>If the iterator is out of sync with its service, status is set
- * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
- *
- * @param status the error code.
- * @param resultLength a ponter to receive the length, can be NULL.
- * @return a pointer to the string, or NULL.
- *
- * @draft ICU 2.4
- */
- virtual const UChar* unext(int32_t *resultLength, UErrorCode& status) = 0;
+ /**
+ * <p>Returns the next element as a NUL-terminated char*. If there
+ * are no more elements, returns NULL. If the resultLength pointer
+ * is not NULL, the length of the string (not counting the
+ * terminating NUL) is returned at that address. If an error
+ * status is returned, the value at resultLength is undefined.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+ *
+ * <p>If the native service string is a UChar* string, it is
+ * converted to char* with the invariant converter. If the
+ * conversion fails (because a character cannot be converted) then
+ * status is set to U_INVARIANT_CONVERSION_ERROR and the return
+ * value is undefined (though not NULL).</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls snext()
+ * and handles the conversion.
+ *
+ * @param status the error code.
+ * @param resultLength a pointer to receive the length, can be NULL.
+ * @return a pointer to the string, or NULL.
+ *
+ * @draft ICU 2.4
+ */
+ virtual const char* next(int32_t *resultLength, UErrorCode& status);
- /**
- * <p>Returns the next element a UnicodeString*. If there are no
- * more elements, returns NULL.</p>
- *
- * <p>The returned pointer is owned by this iterator and must not be
- * deleted by the caller. The pointer is valid until the next call
- * to next, unext, snext, reset, or the enumerator's destructor.</p>
- *
- * <p>If the iterator is out of sync with its service, status is set
- * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
- *
- * @param status the error code.
- * @return a pointer to the string, or NULL.
- *
- * @draft ICU 2.4
- */
- virtual const UnicodeString* snext(UErrorCode& status) = 0;
+ /**
+ * <p>Returns the next element as a NUL-terminated UChar*. If there
+ * are no more elements, returns NULL. If the resultLength pointer
+ * is not NULL, the length of the string (not counting the
+ * terminating NUL) is returned at that address. If an error
+ * status is returned, the value at resultLength is undefined.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls snext()
+ * and handles the conversion.
+ *
+ * @param status the error code.
+ * @param resultLength a ponter to receive the length, can be NULL.
+ * @return a pointer to the string, or NULL.
+ *
+ * @draft ICU 2.4
+ */
+ virtual const UChar* unext(int32_t *resultLength, UErrorCode& status);
- /**
- * <p>Resets the iterator. This re-establishes sync with the
- * service and rewinds the iterator to start at the first
- * element.</p>
- *
- * <p>Previous pointers returned by next, unext, or snext become
- * invalid, and the value returned by count might change.</p>
- *
- * @param status the error code.
- *
- * @draft ICU 2.4
- */
- virtual void reset(UErrorCode& status) = 0;
-};
+ /**
+ * <p>Returns the next element a UnicodeString*. If there are no
+ * more elements, returns NULL.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+ *
+ * @param status the error code.
+ * @return a pointer to the string, or NULL.
+ *
+ * @draft ICU 2.4
+ */
+ virtual const UnicodeString* snext(UErrorCode& status) = 0;
-inline StringEnumeration::~StringEnumeration() {
-}
+ /**
+ * <p>Resets the iterator. This re-establishes sync with the
+ * service and rewinds the iterator to start at the first
+ * element.</p>
+ *
+ * <p>Previous pointers returned by next, unext, or snext become
+ * invalid, and the value returned by count might change.</p>
+ *
+ * @param status the error code.
+ *
+ * @draft ICU 2.4
+ */
+ virtual void reset(UErrorCode& status) = 0;
+
+protected:
+ /**
+ * UnicodeString field for use with default implementations and subclasses.
+ * @draft ICU 2.8
+ */
+ UnicodeString unistr;
+ /**
+ * char * default buffer for use with default implementations and subclasses.
+ * @draft ICU 2.8
+ */
+ char charsBuffer[32];
+ /**
+ * char * buffer for use with default implementations and subclasses.
+ * Allocated in constructor and in ensureCharsCapacity().
+ * @draft ICU 2.8
+ */
+ char *chars;
+ /**
+ * Capacity of chars, for use with default implementations and subclasses.
+ * @draft ICU 2.8
+ */
+ int32_t charsCapacity;
+
+ /**
+ * Default constructor for use with default implementations and subclasses.
+ * @draft ICU 2.8
+ */
+ StringEnumeration();
+
+ /**
+ * Ensures that chars is at least as large as the requested capacity.
+ * For use with default implementations and subclasses.
+ *
+ * @param capacity Requested capacity.
+ * @param status ICU in/out error code.
+ * @draft ICU 2.8
+ */
+ void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
+
+ /**
+ * Converts s to Unicode and sets unistr to the result.
+ * For use with default implementations and subclasses,
+ * especially for implementations of snext() in terms of next().
+ * This is provided with a helper function instead of a default implementation
+ * of snext() to avoid potential infinite loops between next() and snext().
+ *
+ * For example:
+ * \code
+ * const UnicodeString* snext(UErrorCode& status) {
+ * int32_t resultLength=0;
+ * const char *s=next(&resultLength, status);
+ * return setChars(s, resultLength, status);
+ * }
+ * \endcode
+ *
+ * @param s String to be converted to Unicode.
+ * @param length Length of the string.
+ * @param status ICU in/out error code.
+ * @return A pointer to unistr.
+ * @draft ICU 2.8
+ */
+ UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
+};
U_NAMESPACE_END
Index: ubrk.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/ubrk.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- ubrk.h 10 Sep 2003 02:42:04 -0000 1.1
+++ ubrk.h 6 Apr 2004 10:08:07 -0000 1.2
@@ -7,6 +7,7 @@
#define UBRK_H
#include "unicode/utypes.h"
+#include "unicode/uloc.h"
/**
* A text-break iterator.
@@ -196,8 +197,9 @@
* Title Case breaks
* The iterator created using this type locates title boundaries as described for
* Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
- * please use Word Boundary iterator. @draft ICU 2.2
+ * please use Word Boundary iterator.
*
+ * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
*/
UBRK_TITLE
} UBreakIteratorType;
@@ -214,7 +216,7 @@
* word, to allow for further subdivisions of a category in future releases.
* Applications should check for tag values falling within the range, rather
* than for single individual values.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
typedef enum UWordBreak {
/** Tag value for "words" that do not fit into any of other categories.
@@ -241,6 +243,54 @@
UBRK_WORD_IDEO_LIMIT = 500
} UWordBreak;
+/**
+ * Enum constants for the line break tags returned by getRuleStatus().
+ * A range of values is defined for each category of
+ * word, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ * @draft ICU 2.8
+*/
+typedef enum ULineBreakTag {
+ /** Tag value for soft line breaks, positions at which a line break
+ * is acceptable but not required */
+ UBRK_LINE_SOFT = 0,
+ /** Upper bound for soft line breaks. */
+ UBRK_LINE_SOFT_LIMIT = 100,
+ /** Tag value for a hard, or mandatory line break */
+ UBRK_LINE_HARD = 100,
+ /** Upper bound for hard line breaks. */
+ UBRK_LINE_HARD_LIMIT = 200
+} ULineBreakTag;
+
+
+
+/**
+ * Enum constants for the sentence break tags returned by getRuleStatus().
+ * A range of values is defined for each category of
+ * sentence, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ * @draft ICU 2.8
+*/
+typedef enum USentenceBreakTag {
+ /** Tag value for for sentences ending with a sentence terminator
+ * ('.', '?', '!', etc.) character, possibly followed by a
+ * hard separator (CR, LF, PS, etc.)
+ */
+ UBRK_SENTENCE_TERM = 0,
+ /** Upper bound for tags for sentences ended by sentence terminators. */
+ UBRK_SENTENCE_TERM_LIMIT = 100,
+ /** Tag value for for sentences that do not contain an ending
+ * sentence terminator ('.', '?', '!', etc.) character, but
+ * are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
+ */
+ UBRK_SENTENCE_SEP = 100,
+ /** Upper bound for tags for sentences ended by a separator. */
+ UBRK_SENTENCE_SEP_LIMIT = 200
+ /** Tag value for a hard, or mandatory line break */
+} USentenceBreakTag;
+
/**
* Open a new UBreakIterator for locating text boundaries for a specified locale.
@@ -276,7 +326,7 @@
* @param status A UErrorCode to receive any errors.
* @return A UBreakIterator for the specified rules.
* @see ubrk_open
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI UBreakIterator* U_EXPORT2
ubrk_openRules(const UChar *rules,
@@ -466,10 +516,23 @@
* status, a default value of 0 is returned.
* <p>
* For word break iterators, the possible values are defined in enum UWordBreak.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
ubrk_getRuleStatus(UBreakIterator *bi);
+
+/**
+ * Return the locale of the break iterator. You can choose between the valid and
+ * the actual locale.
+ * @param bi break iterator
+ * @param type locale type (valid or actual)
+ * @param status error code
+ * @return locale string
+ * @draft ICU 2.8 likely to change in ICU 3.0, based on feedback
+ */
+U_CAPI const char* U_EXPORT2
+ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
+
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
Index: uchar.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uchar.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- uchar.h 10 Sep 2003 02:42:04 -0000 1.5
+++ uchar.h 6 Apr 2004 10:08:07 -0000 1.6
@@ -290,45 +290,45 @@
UCHAR_BINARY_LIMIT,
/** Enumerated property Bidi_Class.
- Same as u_charDirection, returns UCharDirection values. @draft ICU 2.2 */
+ Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
UCHAR_BIDI_CLASS=0x1000,
- /** First constant for enumerated/integer Unicode properties. @draft ICU 2.2 */
+ /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
UCHAR_INT_START=UCHAR_BIDI_CLASS,
/** Enumerated property Block.
- Same as ublock_getCode, returns UBlockCode values. @draft ICU 2.2 */
+ Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
UCHAR_BLOCK,
/** Enumerated property Canonical_Combining_Class.
- Same as u_getCombiningClass, returns 8-bit numeric values. @draft ICU 2.2 */
+ Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
UCHAR_CANONICAL_COMBINING_CLASS,
/** Enumerated property Decomposition_Type.
- Returns UDecompositionType values. @draft ICU 2.2 */
+ Returns UDecompositionType values. @stable ICU 2.2 */
UCHAR_DECOMPOSITION_TYPE,
/** Enumerated property East_Asian_Width.
See http://www.unicode.org/reports/tr11/
- Returns UEastAsianWidth values. @draft ICU 2.2 */
+ Returns UEastAsianWidth values. @stable ICU 2.2 */
UCHAR_EAST_ASIAN_WIDTH,
/** Enumerated property General_Category.
- Same as u_charType, returns UCharCategory values. @draft ICU 2.2 */
+ Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
UCHAR_GENERAL_CATEGORY,
/** Enumerated property Joining_Group.
- Returns UJoiningGroup values. @draft ICU 2.2 */
+ Returns UJoiningGroup values. @stable ICU 2.2 */
UCHAR_JOINING_GROUP,
/** Enumerated property Joining_Type.
- Returns UJoiningType values. @draft ICU 2.2 */
+ Returns UJoiningType values. @stable ICU 2.2 */
UCHAR_JOINING_TYPE,
/** Enumerated property Line_Break.
- Returns ULineBreak values. @draft ICU 2.2 */
+ Returns ULineBreak values. @stable ICU 2.2 */
UCHAR_LINE_BREAK,
/** Enumerated property Numeric_Type.
- Returns UNumericType values. @draft ICU 2.2 */
+ Returns UNumericType values. @stable ICU 2.2 */
UCHAR_NUMERIC_TYPE,
/** Enumerated property Script.
- Same as uscript_getScript, returns UScriptCode values. @draft ICU 2.2 */
+ Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
UCHAR_SCRIPT,
/** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
Returns UHangulSyllableType values. @draft ICU 2.6 */
UCHAR_HANGUL_SYLLABLE_TYPE,
- /** One more than the last constant for enumerated/integer Unicode properties. @draft ICU 2.2 */
+ /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
UCHAR_INT_LIMIT,
/** Bitmask property General_Category_Mask.
@@ -953,33 +953,33 @@
/* New blocks in Unicode 3.2 */
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_CYRILLIC_SUPPLEMENTARY = 97, /*[0500]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_TAGALOG = 98, /*[1700]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_HANUNOO = 99, /*[1720]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_BUHID = 100, /*[1740]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_TAGBANWA = 101, /*[1760]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
- /** @draft ICU 2.2 */
+ /** @stable ICU 2.2 */
UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/
/* New blocks in Unicode 4 */
@@ -1030,7 +1030,7 @@
*
* @see UCHAR_EAST_ASIAN_WIDTH
* @see u_getIntPropertyValue
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
typedef enum UEastAsianWidth {
U_EA_NEUTRAL, /*[N]*/ /*See note !!*/
@@ -1087,7 +1087,7 @@
* Decomposition Type constants.
*
* @see UCHAR_DECOMPOSITION_TYPE
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
typedef enum UDecompositionType {
U_DT_NONE, /*[none]*/ /*See note !!*/
@@ -1115,7 +1115,7 @@
* Joining Type constants.
*
* @see UCHAR_JOINING_TYPE
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
typedef enum UJoiningType {
U_JT_NON_JOINING, /*[U]*/ /*See note !!*/
@@ -1131,7 +1131,7 @@
* Joining Group constants.
*
* @see UCHAR_JOINING_GROUP
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
typedef enum UJoiningGroup {
U_JG_NO_JOINING_GROUP,
@@ -1195,7 +1195,7 @@
* Line Break constants.
*
* @see UCHAR_LINE_BREAK
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
typedef enum ULineBreak {
U_LB_UNKNOWN, /*[XX]*/ /*See note !!*/
@@ -1236,7 +1236,7 @@
* Numeric Type constants.
*
* @see UCHAR_NUMERIC_TYPE
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
typedef enum UNumericType {
U_NT_NONE, /*[None]*/ /*See note !!*/
@@ -1392,7 +1392,7 @@
* @see u_getIntPropertyMinValue
* @see u_getIntPropertyMaxValue
* @see u_getUnicodeVersion
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_getIntPropertyValue(UChar32 c, UProperty which);
@@ -1413,7 +1413,7 @@
* @see u_getUnicodeVersion
* @see u_getIntPropertyMaxValue
* @see u_getIntPropertyValue
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_getIntPropertyMinValue(UProperty which);
@@ -1442,7 +1442,7 @@
* @see u_getUnicodeVersion
* @see u_getIntPropertyMaxValue
* @see u_getIntPropertyValue
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_getIntPropertyMaxValue(UProperty which);
@@ -1465,7 +1465,7 @@
* @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
*
* @see U_NO_NUMERIC_VALUE
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI double U_EXPORT2
u_getNumericValue(UChar32 c);
@@ -1475,7 +1475,7 @@
* no numeric value is defined for a code point.
*
* @see u_getNumericValue
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
#define U_NO_NUMERIC_VALUE ((double)-123456789.)
@@ -2118,7 +2118,7 @@
* length of the name.
* The length does not include the zero-termination.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_getISOComment(UChar32 c,
@@ -2681,7 +2681,7 @@
* length of the name.
* The length does not include the zero-termination.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
Index: uchriter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uchriter.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- uchriter.h 10 Sep 2003 02:42:04 -0000 1.4
+++ uchriter.h 6 Apr 2004 10:08:07 -0000 1.5
@@ -345,18 +345,18 @@
virtual void getText(UnicodeString& result);
/**
- * Return a class ID for this object (not really public)
- * @return a class ID for this object.
+ * Return a class ID for this class (not really public)
+ * @return a class ID for this class
* @stable ICU 2.0
*/
- virtual UClassID getDynamicClassID(void) const;
+ static UClassID getStaticClassID(void);
/**
- * Return a class ID for this class (not really public)
- * @return a class ID for this class
+ * Return a class ID for this object (not really public)
+ * @return a class ID for this object.
* @stable ICU 2.0
*/
- static inline UClassID getStaticClassID(void);
+ virtual UClassID getDynamicClassID(void) const;
protected:
/**
@@ -370,17 +370,7 @@
*/
const UChar* text;
-private:
- static const char fgClassID;
};
-
-inline UClassID
-UCharCharacterIterator::getStaticClassID(void)
-{ return (UClassID)(&fgClassID); }
-
-inline UClassID
-UCharCharacterIterator::getDynamicClassID(void) const
-{ return UCharCharacterIterator::getStaticClassID(); }
U_NAMESPACE_END
#endif
Index: uclean.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uclean.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- uclean.h 10 Sep 2003 02:42:04 -0000 1.4
+++ uclean.h 6 Apr 2004 10:08:07 -0000 1.5
@@ -33,6 +33,10 @@
* application must guarantee that the first call to u_init() happen
* without contention, in a single thread only.
* <p>
+ * If <code>u_setMemoryFunctions()</code> or
+ * <code>u_setMutexFunctions</code> are needed (uncommon), they must be
+ * called _before_ <code>u_init()</code>.
+ * <p>
* Extra, repeated, or otherwise unneeded calls to u_init() do no harm,
* other than taking a small amount of time.
*
@@ -85,5 +89,153 @@
*/
U_CAPI void U_EXPORT2
u_cleanup(void);
+
+
+
+
+/**
+ * An opaque pointer type that represents an ICU mutex.
+ * For user-implemented mutexes, the value will typically point to a
+ * struct or object that implements the mutex.
+ * @draft ICU 2.8
+ * @system
+ */
+typedef void *UMTX;
+
+/**
+ * Function Pointer type for a user supplied mutex initialization function.
+ * The user-supplied function will be called by ICU whenever ICU needs to create a
+ * new mutex. The function implementation should create a mutex, and store a pointer
+ * to something that uniquely identifies the mutex into the UMTX that is supplied
+ * as a paramter.
+ * @param context user supplied value, obtained from from u_setMutexFunctions().
+ * @param mutex Receives a pointer that identifies the new mutex.
+ * The mutex init function must set the UMTX to a non-null value.
+ * Subsequent calls by ICU to lock, unlock, or destroy a mutex will
+ * identify the mutex by the UMTX value.
+ * @param status Error status. Report errors back to ICU by setting this variable
+ * with an error code.
+ * @draft ICU 2.8
+ * @system
+ */
+typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCode* status);
+
+
+/**
+ * Function Pointer type for a user supplied mutex functions.
+ * One of the user-supplied functions with this signature will be called by ICU
+ * whenever ICU needs to lock, unlock, or destroy a mutex.
+ * @param context user supplied value, obtained from from u_setMutexFunctions().
+ * @param mutex specify the mutex on which to operate.
+ * @draft ICU 2.8
+ * @system
+ */
+typedef void U_CALLCONV UMtxFn (const void *context, UMTX *mutex);
+
+
+/**
+ * Set the functions that ICU will use for mutex operations
+ * Use of this function is optional; by default (without this function), ICU will
+ * directly access system functions for mutex operations
+ * This function can only be used when ICU is in an initial, unused state, before
+ * u_init() has been called.
+ * This function may be used even when ICU has been built without multi-threaded
+ * support (see ICU_USE_THREADS pre-processor variable, umutex.h)
+ * @param context This pointer value will be saved, and then (later) passed as
+ * a parameter to the user-supplied mutex functions each time they
+ * are called.
+ * @param init Pointer to a mutex initialization function. Must be non-null.
+ * @param destroy Pointer to the mutex destroy function. Must be non-null.
+ * @param lock pointer to the mutex lock function. Must be non-null.
+ * @param unlock Pointer to the mutex unlock function. Must be non-null.
+ * @param status Receives error values.
+ * @draft ICU 2.8
+ * @system
+ */
+U_CAPI void U_EXPORT2
+u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock,
+ UErrorCode *status);
+
+
+/**
+ * Pointer type for a user supplied atomic increment or decrement function.
+ * @param context user supplied value, obtained from from u_setAtomicIncDecFunctions().
+ * @param p Pointer to a 32 bit int to be incremented or decremented
+ * @return The value of the variable after the inc or dec operation.
+ * @draft ICU 2.8
+ * @system
+ */
+typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p);
+
+/**
+ * Set the functions that ICU will use for atomic increment and decrement of int32_t values.
+ * Use of this function is optional; by default (without this function), ICU will
+ * use its own internal implementation of atomic increment/decrement.
+ * This function can only be used when ICU is in an initial, unused state, before
+ * u_init() has been called.
+ * @param context This pointer value will be saved, and then (later) passed as
+ * a parameter to the increment and decrement functions each time they
+ * are called. This function can only be called
+ * @param inc Pointer to a function to do an atomic increment operation. Must be non-null.
+ * @param dec Pointer to a function to do an atomic decrement operation. Must be non-null.
+ * @param status Receives error values.
+ * @draft ICU 2.8
+ * @system
+ */
+U_CAPI void U_EXPORT2
+u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec,
+ UErrorCode *status);
+
+
+
+/**
+ * Pointer type for a user supplied memory allocation function.
+ * @param context user supplied value, obtained from from u_setMemoryFunctions().
+ * @param size The number of bytes to be allocated
+ * @return Pointer to the newly allocated memory, or NULL if the allocation failed.
+ * @draft ICU 2.8
+ * @system
+ */
+typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
+/**
+ * Pointer type for a user supplied memory re-allocation function.
+ * @param context user supplied value, obtained from from u_setMemoryFunctions().
+ * @param size The number of bytes to be allocated
+ * @return Pointer to the newly allocated memory, or NULL if the allocation failed.
+ * @draft ICU 2.8
+ * @system
+ */
+typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size);
+/**
+ * Pointer type for a user supplied memory free function. Behavior should be
+ * similar the standard C library free().
+ * @param context user supplied value, obtained from from u_setMemoryFunctions().
+ * @param mem Pointer to the memory block to be resized
+ * @param size The new size for the block
+ * @return Pointer to the resized memory block, or NULL if the resizing failed.
+ * @draft ICU 2.8
+ * @system
+ */
+typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem);
+
+/**
+ * Set the functions that ICU will use for memory allocation.
+ * Use of this function is optional; by default (without this function), ICU will
+ * use the standard C library malloc() and free() functions.
+ * This function can only be used when ICU is in an initial, unused state, before
+ * u_init() has been called.
+ * @param context This pointer value will be saved, and then (later) passed as
+ * a parameter to the memory functions each time they
+ * are called.
+ * @param a Pointer to a user-supplied malloc function.
+ * @param r Pointer to a user-supplied realloc function.
+ * @param f Pointer to a user-supplied free function.
+ * @param status Receives error values.
+ * @draft ICU 2.8
+ * @system
+ */
+U_CAPI void U_EXPORT2
+u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f,
+ UErrorCode *status);
#endif
Index: ucnv.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/ucnv.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- ucnv.h 10 Sep 2003 02:42:04 -0000 1.4
+++ ucnv.h 6 Apr 2004 10:08:07 -0000 1.5
@@ -414,6 +414,9 @@
* stored in the converter cache or the alias table. The only way to open further converters
* is call this function multiple times, or use the ucnv_safeClone() function to clone a
* 'master' converter.</p>
+ *
+ * <p>A future version of ICU may add alias table lookups and/or caching
+ * to this function.</p>
*
* <p>Example Use:
* <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code>
@@ -427,7 +430,7 @@
* @see ucnv_open
* @see ucnv_safeClone
* @see ucnv_close
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI UConverter* U_EXPORT2
ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);
@@ -454,8 +457,13 @@
int32_t *pBufferSize,
UErrorCode *status);
-/** @stable ICU 2.0 */
-#define U_CNV_SAFECLONE_BUFFERSIZE 3072
+/**
+ * \def U_CNV_SAFECLONE_BUFFERSIZE
+ * Definition of a buffer size that is designed to be large enough for
+ * converters to be cloned with ucnv_safeClone().
+ * @stable ICU 2.0
+ */
+#define U_CNV_SAFECLONE_BUFFERSIZE 1024
/**
* Deletes the unicode converter and releases resources associated
@@ -580,10 +588,50 @@
ucnv_resetFromUnicode(UConverter *converter);
/**
- * Returns the maximum length of bytes used by a character. This varies
- * between 1 and 4
- * @param converter the Unicode converter
- * @return the maximum number of bytes allowed by this particular converter
+ * Returns the maximum number of bytes that are output per UChar in conversion
+ * from Unicode using this converter.
+ * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING
+ * to calculate the size of a target buffer for conversion from Unicode.
+ *
+ * Note: Before ICU 2.8, this function did not return reliable numbers for
+ * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS.
+ *
+ * This number may not be the same as the maximum number of bytes per
+ * "conversion unit". In other words, it may not be the intuitively expected
+ * number of bytes per character that would be published for a charset,
+ * and may not fulfill any other purpose than the allocation of an output
+ * buffer of guaranteed sufficient size for a given input length and converter.
+ *
+ * Examples for special cases that are taken into account:
+ * - Supplementary code points may convert to more bytes than BMP code points.
+ * This function returns bytes per UChar (UTF-16 code unit), not per
+ * Unicode code point, for efficient buffer allocation.
+ * - State-shifting output (SI/SO, escapes, etc.) from stateful converters.
+ * - When m input UChars are converted to n output bytes, then the maximum m/n
+ * is taken into account.
+ *
+ * The number returned here does not take into account
+ * (see UCNV_GET_MAX_BYTES_FOR_STRING):
+ * - callbacks which output more than one charset character sequence per call,
+ * like escape callbacks
+ * - initial and final non-character bytes that are output by some converters
+ * (automatic BOMs, initial escape sequence, final SI, etc.)
+ *
+ * Examples for returned values:
+ * - SBCS charsets: 1
+ * - Shift-JIS: 2
+ * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted)
+ * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_)
+ * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS)
+ * - ISO-2022: 3 (always outputs UTF-8)
+ * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS)
+ * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS)
+ *
+ * @param converter The Unicode converter.
+ * @return The maximum number of bytes per UChar that are output by ucnv_fromUnicode(),
+ * to be used together with UCNV_GET_MAX_BYTES_FOR_STRING for buffer allocation.
+ *
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
* @see ucnv_getMinCharSize
* @stable ICU 2.0
*/
@@ -591,8 +639,30 @@
ucnv_getMaxCharSize(const UConverter *converter);
/**
+ * Calculates the size of a buffer for conversion from Unicode to a charset.
+ * The calculated size is guaranteed to be sufficient for this conversion.
+ *
+ * It takes into account initial and final non-character bytes that are output
+ * by some converters.
+ * It does not take into account callbacks which output more than one charset
+ * character sequence per call, like escape callbacks.
+ * The default (substitution) callback only outputs one charset character sequence.
+ *
+ * @param length Number of UChars to be converted.
+ * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter
+ * that will be used.
+ * @return Size of a buffer that will be large enough to hold the output bytes of
+ * converting length UChars with the converter that returned the maxCharSize.
+ *
+ * @see ucnv_getMaxCharSize
+ * @draft ICU 2.8
+ */
+#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
+ (((int32_t)(length)+10)*(int32_t)(maxCharSize))
+
+/**
* Returns the minimum byte length for characters in this codepage.
- * This is either 1 or 2 for all supported codepages.
+ * This is usually either 1 or 2.
* @param converter the Unicode converter
* @return the minimum number of bytes allowed by this particular converter
* @see ucnv_getMaxCharSize
@@ -856,6 +926,12 @@
* consumed. At that point, the caller should reset the source and
* sourceLimit pointers to point to the next chunk.
*
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set.
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ *
* This is a <I>stateful</I> conversion. Additionally, even when all source data has
* been consumed, some data may be in the converters' internal state.
* Call this function repeatedly, updating the target pointers with
@@ -918,6 +994,12 @@
* returned, it means that all of the source buffer has been
* consumed. At that point, the caller should reset the source and
* sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
*
* This is a <I>stateful</I> conversion. Additionally, even when all source data has
* been consumed, some data may be in the converters' internal state.
@@ -970,7 +1052,7 @@
* It is only useful for whole strings, not for streaming conversion.
*
* The maximum output buffer capacity required (barring output from callbacks) will be
- * srcLength*ucnv_getMaxCharSize(cnv).
+ * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
*
* @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
* @param src the input Unicode string
@@ -986,6 +1068,7 @@
* and a buffer of the indicated length would need to be passed in
* @see ucnv_fromUnicode
* @see ucnv_convert
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
* @stable ICU 2.0
*/
U_CAPI int32_t U_EXPORT2
@@ -1027,11 +1110,39 @@
UErrorCode *pErrorCode);
/**
- * Will convert a codepage buffer into unicode one character at a time.
- * <p>This function was written to be efficient when transcoding small
- * amounts of data at a time.
- * In that case it will be more efficient than \Ref{ucnv_toUnicode}.
- * When converting large buffers use \Ref{ucnv_toUnicode}.</p>
+ * Convert a codepage buffer into Unicode one character at a time.
+ * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
+ *
+ * Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
+ * - Faster for small amounts of data, for most converters, e.g.,
+ * US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
+ * (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
+ * it uses ucnv_toUnicode() internally.)
+ * - Convenient.
+ *
+ * Limitations compared to ucnv_toUnicode():
+ * - Always assumes flush=TRUE.
+ * This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
+ * that is, for where the input is supplied in multiple buffers,
+ * because ucnv_getNextUChar() will assume the end of the input at the end
+ * of the first buffer.
+ * - Does not provide offset output.
+ *
+ * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
+ * ucnv_getNextUChar() uses the current state of the converter
+ * (unlike ucnv_toUChars() which always resets first).
+ * However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
+ * stopped in the middle of a character sequence (with flush=FALSE),
+ * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
+ * internally until the next character boundary.
+ * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
+ * start at a character boundary.)
+ *
+ * Instead of using ucnv_getNextUChar(), it is recommended
+ * to convert using ucnv_toUnicode() or ucnv_toUChars()
+ * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
+ * or a C++ CharacterIterator or similar.
+ * This allows streaming conversion and offset output, for example.
*
* <p>Handling of surrogate pairs and supplementary-plane code points:<br>
* There are two different kinds of codepages that provide mappings for surrogate characters:
@@ -1492,7 +1603,7 @@
* @see ucnv_getStandardName
* @see uenum_close
* @see uenum_next
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI UEnumeration * U_EXPORT2
ucnv_openStandardNames(const char *convName,
Index: ucnv_err.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/ucnv_err.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- ucnv_err.h 10 Sep 2003 02:42:04 -0000 1.4
+++ ucnv_err.h 6 Apr 2004 10:08:08 -0000 1.5
@@ -172,7 +172,7 @@
by the new converter, the callback must clone
the data and call ucnv_setFromUCallback
(or setToUCallback) with the correct pointer.
- @draft ICU 2.2
+ @stable ICU 2.2
*/
} UConverterCallbackReason;
Index: uenum.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uenum.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- uenum.h 10 Sep 2003 02:42:04 -0000 1.1
+++ uenum.h 6 Apr 2004 10:08:08 -0000 1.2
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2002, International Business Machines
+* Copyright (C) 2002-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -22,10 +22,10 @@
/**
* An enumeration object.
* For usage in C programs.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
struct UEnumeration;
-/** structure representing an enumeration object instance @draft ICU 2.2 */
+/** structure representing an enumeration object instance @stable ICU 2.2 */
typedef struct UEnumeration UEnumeration;
/**
@@ -33,7 +33,7 @@
* does nothing. After this call, any char* or UChar* pointer
* returned by uenum_unext() or uenum_next() is invalid.
* @param en UEnumeration structure pointer
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI void U_EXPORT2
uenum_close(UEnumeration* en);
@@ -50,7 +50,7 @@
* @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
* iterator is out of sync.
* @return number of elements in the iterator
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
uenum_count(UEnumeration* en, UErrorCode* status);
@@ -74,7 +74,7 @@
* until the next call to any uenum_... method, including
* uenum_next() or uenum_unext(). When all strings have been
* traversed, returns NULL.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI const UChar* U_EXPORT2
uenum_unext(UEnumeration* en,
@@ -107,7 +107,7 @@
* until the next call to any uenum_... method, including
* uenum_next() or uenum_unext(). When all strings have been
* traversed, returns NULL.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI const char* U_EXPORT2
uenum_next(UEnumeration* en,
@@ -121,7 +121,7 @@
* @param en the iterator object
* @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
* the iterator is out of sync with its service.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI void U_EXPORT2
uenum_reset(UEnumeration* en, UErrorCode* status);
Index: uidna.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uidna.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- uidna.h 10 Sep 2003 02:42:04 -0000 1.1
+++ uidna.h 6 Apr 2004 10:08:08 -0000 1.2
@@ -44,7 +44,7 @@
* once.
* ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
* ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
- *\end_file
+ *
*/
/**
@@ -84,18 +84,18 @@
* @param destCapacity Size of dest.
* @param options A bit set of options:
*
- * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
- * and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
- * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_ERROR error code.
*
- * - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
- * are treated as normal Unicode code points.
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
*
- * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
- * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
*
* @param parseError Pointer to UParseError struct to receive information on position
* of error if an error is encountered. Can be NULL.
@@ -129,23 +129,23 @@
* @param destCapacity Size of dest.
* @param options A bit set of options:
*
- * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
- * and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
- * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_ERROR error code.
*
- * - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
- * are treated as normal Unicode code points. <b> Note: </b> This option is
- * required on toUnicode operation because the RFC mandates
- * verification of decoded ACE input by applying toASCII and comparing
- * its output with source
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points. <b> Note: </b> This option is
+ * required on toUnicode operation because the RFC mandates
+ * verification of decoded ACE input by applying toASCII and comparing
+ * its output with source
*
*
*
- * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
- * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
*
* @param parseError Pointer to UParseError struct to receive information on position
* of error if an error is encountered. Can be NULL.
@@ -184,18 +184,18 @@
* @param destCapacity Size of dest.
* @param options A bit set of options:
*
- * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
- * and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
- * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
*
- * - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
- * are treated as normal Unicode code points.
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
*
- * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
- * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
*
* @param parseError Pointer to UParseError struct to receive information on position
* of error if an error is encountered. Can be NULL.
@@ -230,18 +230,18 @@
* @param destCapacity Size of dest.
* @param options A bit set of options:
*
- * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
- * and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
- * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
*
- * - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
- * are treated as normal Unicode code points.
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
*
- * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
- * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
*
* @param parseError Pointer to UParseError struct to receive information on position
* of error if an error is encountered. Can be NULL.
@@ -262,7 +262,7 @@
UErrorCode* status);
/**
- * Compare two strings for IDNs for equivalence.
+ * Compare two IDN strings for equivalence.
* This function splits the domain names into labels and compares them.
* According to IDN RFC, whenever two labels are compared, they are
* considered equal if and only if their ASCII forms (obtained by
@@ -277,18 +277,18 @@
* @param length2 Length of second source string, or -1 if NUL-terminated.
* @param options A bit set of options:
*
- * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
- * and do not use STD3 ASCII rules
- * If unassigned code points are found the operation fails with
- * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
*
- * - UIDNA_UNASSIGNED Unassigned values can be converted to ASCII for query operations
- * If this option is set, the unassigned code points are in the input
- * are treated as normal Unicode code points.
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
*
- * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
- * If this option is set and the input does not satisfy STD3 rules,
- * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
*
* @param status ICU error code in/out parameter.
* Must fulfill U_SUCCESS before the function call.
Index: uiter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uiter.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- uiter.h 10 Sep 2003 02:42:04 -0000 1.1
+++ uiter.h 6 Apr 2004 10:08:08 -0000 1.2
@@ -242,6 +242,10 @@
* to save and restore the iterator position more efficiently than with
* getIndex()/move().
*
+ * The iterator state is defined as a uint32_t value because it is designed
+ * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
+ * of the character iterator.
+ *
* With some UCharIterator implementations (e.g., UTF-8),
* getting and setting the UTF-16 index with existing functions
* (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
@@ -316,6 +320,15 @@
* Implementations of such C APIs are "callers" of UCharIterator functions;
* they only use the "public" function pointers and never access the "protected"
* fields directly.
+ *
+ * The current() and next() functions only check the current index against the
+ * limit, and previous() only checks the current index against the start,
+ * to see if the iterator already reached the end of the iteration range.
+ *
+ * The assumption - in all iterators - is that the index is moved via the API,
+ * which means it won't go out of bounds, or the index is modified by
+ * user code that knows enough about the iterator implementation to set valid
+ * index values.
*
* UCharIterator functions return code unit values 0..0xffff,
* or U_SENTINEL if the iteration bounds are reached.
Index: uloc.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uloc.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- uloc.h 10 Sep 2003 02:42:04 -0000 1.4
+++ uloc.h 6 Apr 2004 10:08:08 -0000 1.5
@@ -22,6 +22,7 @@
#define ULOC_H
#include "unicode/utypes.h"
+#include "unicode/uenum.h"
/**
* \file
@@ -247,6 +248,12 @@
*/
#define ULOC_LANG_CAPACITY 12
/**
+ * Useful constant for the maximum size of the script part of a locale ID
+ * (including the terminating NULL).
+ * @draft ICU 2.8
+ */
+#define ULOC_SCRIPT_CAPACITY 6
+/**
* Useful constant for the maximum size of the country part of a locale ID
* (including the terminating NULL).
* @stable ICU 2.0
@@ -257,8 +264,17 @@
* (including the terminating NULL).
* @stable ICU 2.0
*/
-#define ULOC_FULLNAME_CAPACITY 50
-
+#define ULOC_FULLNAME_CAPACITY 56
+/**
+ * Useful constant for the maximum size of keywords in a locale
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORDS_CAPACITY 50
+/**
+ * Useful constant for the maximum size of keywords in a locale
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
/**
* Constants for *_getLocale()
@@ -275,21 +291,29 @@
* @stable ICU 2.1
*/
typedef enum {
- /** This is locale the data actually comes from */
+ /** This is locale the data actually comes from
+ * @stable ICU 2.1
+ */
ULOC_ACTUAL_LOCALE = 0,
- /** This is the most specific locale supported by ICU */
+ /** This is the most specific locale supported by ICU
+ * @stable ICU 2.1
+ */
ULOC_VALID_LOCALE = 1,
- /** This is the requested locale */
+ /** This is the requested locale
+ * @deprecated ICU 2.8
+ */
ULOC_REQUESTED_LOCALE = 2,
ULOC_DATA_LOCALE_TYPE_LIMIT
} ULocDataLocaleType ;
/**
- * Gets ICU's default locale. This pointer and/or the contents of the pointer may
- * become invalid if the uloc_setDefault() is called, so copy the contents of the
- * pointer before calling uloc_setDefault().
- *
+ * Gets ICU's default locale.
+ * The returned string is a snapshot in time, and will remain valid
+ * and unchanged even when uloc_setDefault() is called.
+ * The returned storage is owned by ICU, and must not be altered or deleted
+ * by the caller.
+ *
* @return the ICU default locale
* @system
* @stable ICU 2.0
@@ -298,7 +322,15 @@
uloc_getDefault(void);
/**
- * Sets ICU's default locale. Call this once during setup or program initialization.
+ * Sets ICU's default locale.
+ * By default (without calling this function), ICU's default locale will be based
+ * on information obtained from the underlying system environment.
+ * <p>
+ * Changes to ICU's default locale do not propagate back to the
+ * system environment.
+ * <p>
+ * Changes to ICU's default locale to not affect any ICU services that
+ * may already be open based on the previous default locale value.
*
* @param localeID the new ICU default locale. A value of NULL will try to get
* the system's default locale.
@@ -313,12 +345,12 @@
/**
* Gets the language code for the specified locale.
*
- * @param localeID the locale to get the ISO langauge code with
- * @param language the langauge code for localeID
+ * @param localeID the locale to get the ISO language code with
+ * @param language the language code for localeID
* @param languageCapacity the size of the language buffer to store the
* language code with
- * @param err error information if retrieving the language code failed
- * @return the actual buffer size needed for the langauge code. If it's greater
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code. If it's greater
* than languageCapacity, the returned language code will be truncated.
* @stable ICU 2.0
*/
@@ -329,6 +361,24 @@
UErrorCode* err);
/**
+ * Gets the script code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param script the language code for localeID
+ * @param scriptCapacity the size of the language buffer to store the
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code. If it's greater
+ * than scriptCapacity, the returned language code will be truncated.
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getScript(const char* localeID,
+ char* script,
+ int32_t scriptCapacity,
+ UErrorCode* err);
+
+/**
* Gets the country code for the specified locale.
*
* @param localeID the locale to get the country code with
@@ -363,6 +413,30 @@
char* variant,
int32_t variantCapacity,
UErrorCode* err);
+
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the ICU locale ID to
+ * a certain extent. Upper and lower case are set as needed.
+ * It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
/**
* Gets the full name for the specified locale.
* Note: This has the effect of 'canonicalizing' the string to
@@ -381,7 +455,7 @@
* @stable ICU 2.0
*/
U_CAPI int32_t U_EXPORT2
-uloc_getName(const char* localeID,
+uloc_canonicalize(const char* localeID,
char* name,
int32_t nameCapacity,
UErrorCode* err);
@@ -389,8 +463,8 @@
/**
* Gets the ISO language code for the specified locale.
*
- * @param localeID the locale to get the ISO langauge code with
- * @return language the ISO langauge code for localeID
+ * @param localeID the locale to get the ISO language code with
+ * @return language the ISO language code for localeID
* @stable ICU 2.0
*/
U_CAPI const char* U_EXPORT2
@@ -409,6 +483,7 @@
/**
* Gets the Win32 LCID value for the specified locale.
+ * If the ICU locale is not recognized by Windows, 0 will be returned.
*
* @param localeID the locale to get the Win32 LCID value with
* @return country the Win32 LCID for localeID
@@ -420,34 +495,57 @@
/**
* Gets the language name suitable for display for the specified locale.
*
- * @param locale the locale to get the ISO langauge code with
- * @param inLocale Specifies the locale to be used to display the name. In other words,
+ * @param locale the locale to get the ISO language code with
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::getFrench() for
* inLocale would result in "Anglais", while passing Locale::getGerman()
* for inLocale would result in "Englisch".
- * @param language the displayable langauge code for localeID
+ * @param language the displayable language code for localeID
* @param languageCapacity the size of the language buffer to store the
* displayable language code with
* @param status error information if retrieving the displayable language code failed
- * @return the actual buffer size needed for the displayable langauge code. If it's greater
+ * @return the actual buffer size needed for the displayable language code. If it's greater
* than languageCapacity, the returned language code will be truncated.
* @stable ICU 2.0
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayLanguage(const char* locale,
- const char* inLocale,
+ const char* displayLocale,
UChar* language,
int32_t languageCapacity,
UErrorCode* status);
/**
+ * Gets the script name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable script code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "", while passing Locale::getGerman()
+ * for inLocale would result in "". NULL may be used to specify the default.
+ * @param script the displayable country code for localeID
+ * @param scriptCapacity the size of the script buffer to store the
+ * displayable script code with
+ * @param status error information if retrieving the displayable script code failed
+ * @return the actual buffer size needed for the displayable script code. If it's greater
+ * than scriptCapacity, the returned displayable script code will be truncated.
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayScript(const char* locale,
+ const char* displayLocale,
+ UChar* script,
+ int32_t scriptCapacity,
+ UErrorCode* status);
+
+/**
* Gets the country name suitable for display for the specified locale.
*
- * @param locale the locale to get the displayable country code with
- * @param inLocale Specifies the locale to be used to display the name. In other words,
+ * @param locale the locale to get the displayable country code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::getFrench() for
* inLocale would result in "Anglais", while passing Locale::getGerman()
- * for inLocale would result in "Englisch".
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
* @param country the displayable country code for localeID
* @param countryCapacity the size of the country buffer to store the
* displayable country code with
@@ -458,20 +556,20 @@
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayCountry(const char* locale,
- const char* inLocale,
- UChar* country,
- int32_t countryCapacity,
- UErrorCode* status); /* NULL may be used to specify the default */
+ const char* displayLocale,
+ UChar* country,
+ int32_t countryCapacity,
+ UErrorCode* status);
/**
- * Gets the variant code suitable for display for the specified locale.
+ * Gets the variant name suitable for display for the specified locale.
*
- * @param locale the locale to get the displayable variant code with
- * @param inLocale Specifies the locale to be used to display the name. In other words,
+ * @param locale the locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::getFrench() for
* inLocale would result in "Anglais", while passing Locale::getGerman()
- * for inLocale would result in "Englisch".
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
* @param variant the displayable variant code for localeID
* @param variantCapacity the size of the variant buffer to store the
* displayable variant code with
@@ -482,19 +580,92 @@
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayVariant(const char* locale,
- const char* inLocale,
- UChar* variant,
- int32_t variantCapacity,
- UErrorCode* status); /* NULL may be used to specify the default */
+ const char* displayLocale,
+ UChar* variant,
+ int32_t variantCapacity,
+ UErrorCode* status);
/**
+ * Gets the keyword name suitable for display for the specified locale.
+ * E.g: for the locale string de_DE@collation=PHONEBOOK, this API gets the display
+ * string for the keyword collation.
+ * Usage:
+ * <code>
+ * UErrorCode status = U_ZERO_ERROR;
+ * const char* keyword =NULL;
+ * int32_t keywordLen = 0;
+ * int32_t keywordCount = 0;
+ * UChar displayKeyword[256];
+ * int32_t displayKeywordLen = 0;
+ * UEnumeration* keywordEnum = uloc_getKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status);
+ * for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){
+ * if(U_FAILURE(status)){
+ * ...something went wrong so handle the error...
+ * break;
+ * }
+ * // the uenum_next returns NUL terminated string
+ * keyword = uenum_next(keywordEnum, &keywordLen, &status);
+ * displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256);
+ * ... do something interesting .....
+ * }
+ * uenum_close(keywordEnum);
+ * </code>
+ * @param keyword The keyword whose display string needs to be returned.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest the buffer to which the displayable keyword should be written.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param status error information if retrieving the displayable string failed.
+ * Should not be NULL and should not indicate failure on entry.
+ * @return the actual buffer size needed for the displayable variant code.
+ * @see #uloc_getKeywords
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeyword(const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status);
+/**
+ * Gets the value of the keyword suitable for display for the specified locale.
+ * E.g: for the locale string de_DE@collation=PHONEBOOK, this API gets the display
+ * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.
+ *
+ * @param locale The locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param keyword The keyword for whose value should be used.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest the buffer to which the displayable keyword should be written.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param status error information if retrieving the displayable string failed.
+ * Should not be NULL and must not indicate failure on entry.
+ * @return the actual buffer size needed for the displayable variant code.
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeywordValue( const char* locale,
+ const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status);
+/**
* Gets the full name suitable for display for the specified locale.
*
- * @param localeID the locale to get the displayable name with
+ * @param localeID the locale to get the displayable name with. NULL may be used to specify the default.
* @param inLocaleID Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::getFrench() for
* inLocale would result in "Anglais", while passing Locale::getGerman()
- * for inLocale would result in "Englisch".
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
* @param result the displayable name for localeID
* @param maxResultSize the size of the name buffer to store the
* displayable full name with
@@ -505,7 +676,7 @@
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayName(const char* localeID,
- const char* inLocaleID, /* NULL may be used to specify the default */
+ const char* inLocaleID,
UChar* result,
int32_t maxResultSize,
UErrorCode* err);
@@ -574,6 +745,79 @@
char* parent,
int32_t parentCapacity,
UErrorCode* err);
+
+
+/**
+ * Character separating keywords from the locale string
+ * different for EBCDIC - TODO
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORD_SEPARATOR '@'
+/**
+ * Character for assigning value to a keyword
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORD_ASSIGN '='
+/**
+ * Character separating keywords
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORD_ITEM_SEPARATOR ';'
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format. It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API strips off the keyword part, so "de_DE@collation=phonebook"
+ * will become "de_DE".
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getBaseName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets an enumeration of keywords for the specified locale. Enumeration
+ * must get disposed of by the client using uenum_close function.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param status error information if retrieving the keywords failed
+ * @return enumeration of keywords or NULL if there are no keywords.
+ * @draft ICU 2.8
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openKeywords(const char* localeID,
+ UErrorCode* status);
+
+/**
+ * Get the value for a keyword. Locale name does not need to be normalized.
+ *
+ * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK")
+ * @param keywordName name of the keyword for which we want the value. Case insensitive.
+ * @param buffer receiving buffer
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code - buffer not big enough.
+ * @return the length of keyword value
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getKeywordValue(const char* localeID,
+ const char* keywordName,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status);
/*eof*/
Index: umachine.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/umachine.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- umachine.h 10 Sep 2003 02:42:04 -0000 1.4
+++ umachine.h 6 Apr 2004 10:08:08 -0000 1.5
@@ -1,7 +1,7 @@
/*
******************************************************************************
*
-* Copyright (C) 1999-2002, International Business Machines
+* Copyright (C) 1999-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@@ -188,37 +188,36 @@
#endif
#if defined(U_INT64_T_UNAVAILABLE)
-# ifndef INTMAX_MIN
-# define INTMAX_MIN INT32_MIN
-# endif
-# ifndef INTMAX_MAX
-# define INTMAX_MAX INT32_MAX
-# endif
-# ifndef UINTMAX_MAX
-# define UINTMAX_MAX UINT32_MAX
-# endif
+# error int64_t is required for decimal format and rule-based number format.
#else
-# ifndef INT64_MIN
-/** The smallest value a 64 bit signed integer can hold @stable ICU 2.0 */
-# define INT64_MIN ((int64_t)(-9223372036854775807-1))
-# endif
-# ifndef INT64_MAX
-/** The largest value a 64 bit signed integer can hold @stable ICU 2.0 */
-# define INT64_MAX ((int64_t)(9223372036854775807))
-# endif
-# ifndef UINT64_MAX
-/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.0 */
-# define UINT64_MAX ((uint64_t)(18446744073709551615))
-# endif
-# ifndef INTMAX_MIN
-# define INTMAX_MIN INT64_MIN
-# endif
-# ifndef INTMAX_MAX
-# define INTMAX_MAX INT64_MAX
-# endif
-# ifndef UINTMAX_MAX
-# define UINTMAX_MAX UINT64_MAX
-# endif
+# ifndef INT64_C
+/**
+ * Provides a platform independent way to specify a signed 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
+ * @draft ICU 2.8
+ */
+# define INT64_C(c) c ## LL
+# endif
+# ifndef UINT64_C
+/**
+ * Provides a platform independent way to specify an unsigned 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
+ * @draft ICU 2.8
+ */
+# define UINT64_C(c) c ## ULL
+# endif
+# ifndef U_INT64_MIN
+/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
+# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
+# endif
+# ifndef U_INT64_MAX
+/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
+# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
+# endif
+# ifndef U_UINT64_MAX
+/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
+# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
+# endif
#endif
/*==========================================================================*/
Index: umisc.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/umisc.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- umisc.h 10 Sep 2003 02:42:04 -0000 1.3
+++ umisc.h 6 Apr 2004 10:08:08 -0000 1.4
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 1999-2001, International Business Machines
+* Copyright (C) 1999-2003, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: umisc.h
Index: unifilt.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/unifilt.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- unifilt.h 10 Sep 2003 02:42:04 -0000 1.1
+++ unifilt.h 6 Apr 2004 10:08:08 -0000 1.2
@@ -1,5 +1,6 @@
/*
-* Copyright (C) 1999, International Business Machines Corporation and others. All Rights Reserved.
+* Copyright (C) 1999-2003, International Business Machines Corporation and others.
+* All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/17/99 aliu Creation.
@@ -85,40 +86,41 @@
* UnicodeFunctor API. Nothing to do.
* @draft ICU 2.4
*/
- virtual void setData(const TransliterationRuleData*) {}
+ virtual void setData(const TransliterationRuleData*);
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- virtual inline UClassID getDynamicClassID() const = 0;
+ virtual UClassID getDynamicClassID() const = 0;
- /**
+ /*
* ICU "poor man's RTTI", returns a UClassID for this class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
+/* static UClassID getStaticClassID() { return (UClassID)&fgClassID; }*/
protected:
- /**
+ /*
+ * Since this class has pure virtual functions,
+ * a constructor can't be used.
* @stable ICU 2.0
*/
- UnicodeFilter();
+/* UnicodeFilter();*/
private:
- /**
+ /*
* The address of this static class variable serves as this class's ID
* for ICU "poor man's RTTI".
*/
- static const char fgClassID;
+/* static const char fgClassID;*/
};
-inline UnicodeFilter::UnicodeFilter() {}
-inline UnicodeFilter::~UnicodeFilter() {}
+/*inline UnicodeFilter::UnicodeFilter() {}*/
U_NAMESPACE_END
Index: unifunct.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/unifunct.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- unifunct.h 10 Sep 2003 02:42:04 -0000 1.1
+++ unifunct.h 6 Apr 2004 10:08:08 -0000 1.2
@@ -27,7 +27,7 @@
*/
class U_COMMON_API UnicodeFunctor : public UObject {
- public:
+public:
/**
* Destructor
@@ -67,7 +67,7 @@
*/
virtual UnicodeReplacer* toReplacer() const;
- /**
+ /*
* Return the class ID for this class. This is useful only for
* comparing to a return value from getDynamicClassID(). For example:
* <pre>
@@ -78,7 +78,7 @@
* @return The class ID for all objects of this class.
* @stable ICU 2.0
*/
- static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
+ /*static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }*/
/**
* Returns a unique class ID <b>polymorphically</b>. This method
@@ -115,24 +115,25 @@
*/
virtual void setData(const TransliterationRuleData*) = 0;
- protected:
+protected:
/**
+ * Since this class has pure virtual functions,
+ * a constructor can't be used.
* @stable ICU 2.0
*/
- UnicodeFunctor();
+ /*UnicodeFunctor();*/
- private:
+private:
- /**
+ /*
* Class identifier for subclasses of UnicodeFunctor that do not
* define their class (anonymous subclasses).
*/
- static const char fgClassID;
+ /*static const char fgClassID;*/
};
-inline UnicodeFunctor::UnicodeFunctor() {}
-inline UnicodeFunctor::~UnicodeFunctor() {}
+/*inline UnicodeFunctor::UnicodeFunctor() {}*/
U_NAMESPACE_END
Index: unimatch.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/unimatch.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- unimatch.h 10 Sep 2003 02:42:04 -0000 1.1
+++ unimatch.h 6 Apr 2004 10:08:08 -0000 1.2
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved.
+* Copyright (C) 2001-2003, International Business Machines Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 07/18/01 aliu Creation.
@@ -66,7 +66,7 @@
* Destructor.
* @draft ICU 2.4
*/
- virtual inline ~UnicodeMatcher() {};
+ virtual ~UnicodeMatcher();
/**
* Return a UMatchDegree value indicating the degree of match for
Index: uniset.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uniset.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- uniset.h 10 Sep 2003 02:42:04 -0000 1.1
+++ uniset.h 6 Apr 2004 10:08:08 -0000 1.2
@@ -22,7 +22,7 @@
class SymbolTable;
class UVector;
class CaseEquivClass;
-
+class RuleCharacterIterator;
/**
* A mutable set of Unicode characters and multicharacter strings. Objects of this class
@@ -282,13 +282,21 @@
* Minimum value that can be stored in a UnicodeSet.
* @draft ICU 2.4
*/
+#ifdef U_CYGWIN
+ static U_COMMON_API const UChar32 MIN_VALUE;
+#else
static const UChar32 MIN_VALUE;
+#endif
/**
* Maximum value that can be stored in a UnicodeSet.
* @draft ICU 2.4
*/
+#ifdef U_CYGWIN
+ static U_COMMON_API const UChar32 MAX_VALUE;
+#else
static const UChar32 MAX_VALUE;
+#endif
//----------------------------------------------------------------
// Constructors &c
@@ -329,12 +337,33 @@
* @param pattern a string specifying what characters are in the set
* @param options bitmask for options to apply to the pattern.
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param symbols a symbol table mapping variable names to values
+ * and stand-in characters to UnicodeSets; may be NULL
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
* contains a syntax error.
* @internal
*/
UnicodeSet(const UnicodeString& pattern,
uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
+ /**
+ * Constructs a set from the given pattern. See the class description
+ * for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param pos on input, the position in pattern at which to start parsing.
+ * On output, the position after the last character parsed.
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param symbols a symbol table mapping variable names to values
+ * and stand-in characters to UnicodeSets; may be NULL
+ * @param status input-output error code
+ * @draft ICU 2.8
+ */
+ UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
+ uint32_t options,
+ const SymbolTable* symbols,
UErrorCode& status);
#ifdef U_USE_UNICODESET_DEPRECATES
@@ -432,6 +461,8 @@
* @param pattern a string specifying what characters are in the set
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
* contains a syntax error.
+ * <em> Empties the set passed before applying the pattern.<em>
+ * @return a reference to this
* @stable ICU 2.0
*/
virtual UnicodeSet& applyPattern(const UnicodeString& pattern,
@@ -444,12 +475,53 @@
* @param pattern a string specifying what characters are in the set
* @param options bitmask for options to apply to the pattern.
* Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param symbols a symbol table mapping variable names to
+ * values and stand-ins to UnicodeSets; may be NULL
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
* contains a syntax error.
+ *<em> Empties the set passed before applying the pattern.<em>
+ * @return a reference to this
* @internal
*/
UnicodeSet& applyPattern(const UnicodeString& pattern,
uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
+ /**
+ * Parses the given pattern, starting at the given position. The
+ * character at pattern.charAt(pos.getIndex()) must be '[', or the
+ * parse fails. Parsing continues until the corresponding closing
+ * ']'. If a syntax error is encountered between the opening and
+ * closing brace, the parse fails. Upon return from a successful
+ * parse, the ParsePosition is updated to point to the character
+ * following the closing ']', and a StringBuffer containing a
+ * pairs list for the parsed pattern is returned. This method calls
+ * itself recursively to parse embedded subpatterns.
+ *<em> Empties the set passed before applying the pattern.<em>
+ *
+ * @param pattern the string containing the pattern to be parsed.
+ * The portion of the string from pos.getIndex(), which must be a
+ * '[', to the corresponding closing ']', is parsed.
+ * @param pos upon entry, the position at which to being parsing.
+ * The character at pattern.charAt(pos.getIndex()) must be a '['.
+ * Upon return from a successful parse, pos.getIndex() is either
+ * the character after the closing ']' of the parsed pattern, or
+ * pattern.length() if the closing ']' is the last character of
+ * the pattern string.
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param symbols a symbol table mapping variable names to
+ * values and stand-ins to UnicodeSets; may be NULL
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+ * contains a syntax error.
+ * @return a reference to this
+ * @draft ICU 2.8
+ */
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
+ ParsePosition& pos,
+ uint32_t options,
+ const SymbolTable* symbols,
UErrorCode& status);
/**
@@ -1113,49 +1185,11 @@
const UnicodeString* getString(int32_t index) const;
-private:
-
- static const char fgClassID;
-
//----------------------------------------------------------------
// RuleBasedTransliterator support
//----------------------------------------------------------------
- friend class TransliteratorParser;
- friend class TransliteratorIDParser;
-
- friend class RBBIRuleScanner;
- friend class RegexCompile;
-
- /**
- * Constructs a set from the given pattern. See the class description
- * for the syntax of the pattern language.
-
- * @param pattern a string specifying what characters are in the set
- * @param pos on input, the position in pattern at which to start parsing.
- * On output, the position after the last character parsed.
- * @param varNameToChar a mapping from variable names (String) to characters
- * (Character). May be null. If varCharToSet is non-null, then names may
- * map to either single characters or sets, depending on whether a mapping
- * exists in varCharToSet. If varCharToSet is null then all names map to
- * single characters.
- * @param varCharToSet a mapping from characters (Character objects from
- * varNameToChar) to UnicodeSet objects. May be null. Is only used if
- * varNameToChar is also non-null.
- * @exception <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
- * contains a syntax error.
- */
- UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
- const SymbolTable& symbols,
- UErrorCode& status);
-
- /**
- * Constructs a set from the given pattern. Identical to the
- * 4-parameter ParsePosition contstructor, but does not take a
- * SymbolTable, and does not recognize embedded variables.
- */
- UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
- uint32_t options, UErrorCode& status);
+private:
/**
* Returns <tt>true</tt> if this set contains any character whose low byte
@@ -1170,35 +1204,11 @@
// Implementation: Pattern parsing
//----------------------------------------------------------------
- /**
- * Parses the given pattern, starting at the given position. The
- * character at pattern.charAt(pos.getIndex()) must be '[', or the
- * parse fails. Parsing continues until the corresponding closing
- * ']'. If a syntax error is encountered between the opening and
- * closing brace, the parse fails. Upon return from a successful
- * parse, the ParsePosition is updated to point to the character
- * following the closing ']', and a StringBuffer containing a
- * pairs list for the parsed pattern is returned. This method calls
- * itself recursively to parse embedded subpatterns.
- *
- * @param pattern the string containing the pattern to be parsed.
- * The portion of the string from pos.getIndex(), which must be a
- * '[', to the corresponding closing ']', is parsed.
- * @param pos upon entry, the position at which to being parsing.
- * The character at pattern.charAt(pos.getIndex()) must be a '['.
- * Upon return from a successful parse, pos.getIndex() is either
- * the character after the closing ']' of the parsed pattern, or
- * pattern.length() if the closing ']' is the last character of
- * the pattern string.
- * @return a StringBuffer containing a pairs list for the parsed
- * substring of <code>pattern</code>
- * @exception U_ILLEGAL_ARGUMENT_ERROR if the parse fails.
- */
- void applyPattern(const UnicodeString& pattern,
- ParsePosition& pos,
- uint32_t options,
+ void applyPattern(RuleCharacterIterator& chars,
const SymbolTable* symbols,
- UErrorCode& status);
+ UnicodeString& rebuiltPat,
+ uint32_t options,
+ UErrorCode& ec);
//----------------------------------------------------------------
// Implementation: Utility methods
@@ -1212,13 +1222,6 @@
UBool allocateStrings();
- void _applyPattern(const UnicodeString& pattern,
- ParsePosition& pos,
- uint32_t options,
- const SymbolTable* symbols,
- UnicodeString& rebuiltPat,
- UErrorCode& status);
-
UnicodeString& _toPattern(UnicodeString& result,
UBool escapeUnprintable) const;
@@ -1247,6 +1250,9 @@
static UBool resemblesPropertyPattern(const UnicodeString& pattern,
int32_t pos);
+ static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
+ int32_t iterOpts);
+
/**
* Parse the given property pattern at the given parse position
* and set this UnicodeSet to the result.
@@ -1289,6 +1295,10 @@
ParsePosition& ppos,
UErrorCode &ec);
+ void applyPropertyPattern(RuleCharacterIterator& chars,
+ UnicodeString& rebuiltPat,
+ UErrorCode& ec);
+
/**
* A filter that returns TRUE if the given code point should be
* included in the UnicodeSet being constructed.
@@ -1329,14 +1339,6 @@
static const CaseEquivClass* getCaseMapOf(UChar folded);
};
-
-inline UClassID
-UnicodeSet::getStaticClassID(void)
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-UnicodeSet::getDynamicClassID(void) const
-{ return UnicodeSet::getStaticClassID(); }
inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
return !operator==(o);
Index: unistr.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/unistr.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- unistr.h 10 Sep 2003 02:42:04 -0000 1.5
+++ unistr.h 6 Apr 2004 10:08:08 -0000 1.6
@@ -21,8 +21,6 @@
#ifndef UNISTR_H
#define UNISTR_H
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
#include "unicode/rep.h"
struct UConverter; // unicode/ucnv.h
@@ -33,7 +31,7 @@
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
#endif
@@ -47,7 +45,6 @@
U_NAMESPACE_BEGIN
class Locale; // unicode/locid.h
-class UCharReference;
class StringCharacterIterator;
class BreakIterator; // unicode/brkiter.h
@@ -1470,8 +1467,10 @@
/**
* Return the length of the UnicodeString object.
- * The length is the number of characters in the text.
+ * The length is the number of UChar code units are in the UnicodeString.
+ * If you want the number of code points, please use countChar32().
* @return the length of the UnicodeString object
+ * @see countChar32
* @stable ICU 2.0
*/
inline int32_t length(void) const;
@@ -1486,6 +1485,7 @@
* @param start the index of the first code unit to check
* @param length the number of UChar code units to check
* @return the number of code points in the specified code units
+ * @see length
* @stable ICU 2.0
*/
int32_t
@@ -1622,7 +1622,7 @@
* @param srcStart the offset into <TT>srcText</TT> where new characters
* will be obtained
* @return a reference to this
- * @draft ICU2.2
+ * @stable ICU 2.2
*/
inline UnicodeString& setTo(const UnicodeString& srcText,
int32_t srcStart);
@@ -2366,6 +2366,7 @@
* that are to be titlecased.
* If none is provided (0), then a standard titlecase
* break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
* @return A reference to this.
* @stable ICU 2.1
*/
@@ -2393,6 +2394,7 @@
* that are to be titlecased.
* If none is provided (0), then a standard titlecase
* break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
* @param locale The locale to consider.
* @return A reference to this.
* @stable ICU 2.1
@@ -2548,7 +2550,7 @@
*
* @see getBuffer(int32_t minCapacity)
* @see getBuffer()
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
inline const UChar *getTerminatedBuffer();
@@ -2717,7 +2719,7 @@
* 'Substring' constructor from tail of source string.
* @param src The UnicodeString object to copy.
* @param srcStart The offset into <tt>src</tt> at which to start copying.
- * @draft ICU2.2
+ * @stable ICU 2.2
*/
UnicodeString(const UnicodeString& src, int32_t srcStart);
@@ -2726,7 +2728,7 @@
* @param src The UnicodeString object to copy.
* @param srcStart The offset into <tt>src</tt> at which to start copying.
* @param srcLength The number of characters from <tt>src</tt> to copy.
- * @draft ICU2.2
+ * @stable ICU 2.2
*/
UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
@@ -2814,18 +2816,18 @@
UChar32 unescapeAt(int32_t &offset) const;
/**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ * ICU "poor man's RTTI", returns a UClassID for this class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- virtual inline UClassID getDynamicClassID() const;
+ static UClassID getStaticClassID();
/**
- * ICU "poor man's RTTI", returns a UClassID for this class.
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- static inline UClassID getStaticClassID();
+ virtual UClassID getDynamicClassID() const;
//========================================
// Implementation methods
@@ -3067,15 +3069,23 @@
uint16_t fFlags; // bit flags: see constants above
UChar fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
- /**
- * The address of this static class variable serves as this class's ID
- * for ICU "poor man's RTTI".
- */
- static const char fgClassID;
};
+/**
+ * Create a new UnicodeString with the concatenation of two others.
+ *
+ * @param s1 The first string to be copied to the new one.
+ * @param s2 The second string to be copied to the new one, after s1.
+ * @return UnicodeString(s1).append(s2)
+ * @draft ICU 2.8
+ */
+U_COMMON_API UnicodeString
+operator+ (const UnicodeString &s1, const UnicodeString &s2);
+
U_NAMESPACE_END
+// inline implementations -------------------------------------------------- ***
+
//========================================
// Array copying
//========================================
@@ -3145,14 +3155,6 @@
//========================================
// Read-only implementation methods
//========================================
-inline UClassID
-UnicodeString::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-UnicodeString::getDynamicClassID() const
-{ return UnicodeString::getStaticClassID(); }
-
inline int32_t
UnicodeString::length() const
{ return fLength; }
@@ -4015,7 +4017,6 @@
UnicodeString::reverse(int32_t start,
int32_t _length)
{ return doReverse(start, _length); }
-
U_NAMESPACE_END
Index: unorm.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/unorm.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- unorm.h 10 Sep 2003 02:42:04 -0000 1.4
+++ unorm.h 6 Apr 2004 10:08:08 -0000 1.5
@@ -271,7 +271,7 @@
* "mode" normalization form.
*
* @see unorm_quickCheck
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI UBool U_EXPORT2
unorm_isNormalized(const UChar *src, int32_t srcLength,
@@ -460,14 +460,14 @@
/**
* Option bit for unorm_compare:
* Both input strings are assumed to fulfill FCD conditions.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
#define UNORM_INPUT_IS_FCD 0x20000
/**
* Option bit for unorm_compare:
* Perform case-insensitive comparison.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
#define U_COMPARE_IGNORE_CASE 0x10000
@@ -476,7 +476,7 @@
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
#endif
@@ -562,7 +562,7 @@
* @see u_strCompare
* @see u_strCaseCompare
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
unorm_compare(const UChar *s1, int32_t length1,
Index: uobject.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uobject.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- uobject.h 10 Sep 2003 02:42:04 -0000 1.1
+++ uobject.h 6 Apr 2004 10:08:09 -0000 1.2
@@ -34,7 +34,7 @@
* applications that statically link the C Runtime library, meaning that
* the app and ICU will be using different heaps.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
#ifndef U_OVERRIDE_CXX_ALLOCATION
#define U_OVERRIDE_CXX_ALLOCATION 1
@@ -151,23 +151,23 @@
* This is because some compilers do not support covariant (same-as-this)
* return types; cast to the appropriate subclass if necessary.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
class U_COMMON_API UObject : public UMemory {
public:
/**
* Destructor.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- virtual inline ~UObject() {}
+ virtual ~UObject();
/**
* ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
- virtual inline UClassID getDynamicClassID() const = 0;
+ virtual UClassID getDynamicClassID() const = 0;
protected:
// the following functions are protected to prevent instantiation and
@@ -181,7 +181,7 @@
// commented out because UObject is abstract (see getDynamicClassID)
// inline UObject(const UObject &other) {}
-#if U_ICU_VERSION_MAJOR_NUM>2 || (U_ICU_VERSION_MAJOR_NUM==2 && U_ICU_VERSION_MINOR_NUM>6)
+#if U_ICU_VERSION_MAJOR_NUM>2
// TODO post ICU 2.4 (This comment inserted in 2.2)
// some or all of the following "boilerplate" functions may be made public
// in a future ICU4C release when all subclasses implement them
@@ -213,6 +213,23 @@
UObject &UObject::operator=(const UObject &);
*/
};
+
+/**
+ * This is a simple macro to add ICU RTTI to an ICU object implementation.
+ * This does not go into the header. This should only be used in *.cpp files.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
+ UClassID myClass::getStaticClassID() { \
+ static const char classID = 0; \
+ return (UClassID)&classID; \
+ } \
+ UClassID myClass::getDynamicClassID() const \
+ { return myClass::getStaticClassID(); }
+
+
U_NAMESPACE_END
Index: urename.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/urename.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- urename.h 10 Sep 2003 02:42:04 -0000 1.5
+++ urename.h 6 Apr 2004 10:08:09 -0000 1.6
@@ -29,1358 +29,1377 @@
/* C exports renaming data */
-#define T_CString_integerToString T_CString_integerToString_2_6
-#define T_CString_stricmp T_CString_stricmp_2_6
-#define T_CString_stringToInteger T_CString_stringToInteger_2_6
-#define T_CString_strnicmp T_CString_strnicmp_2_6
-#define T_CString_toLowerCase T_CString_toLowerCase_2_6
-#define T_CString_toUpperCase T_CString_toUpperCase_2_6
-#define T_FileStream_close T_FileStream_close_2_6
-#define T_FileStream_eof T_FileStream_eof_2_6
[...2693 lines suppressed...]
+#define UnicodeFilter UnicodeFilter_2_8
+#define UnicodeFunctor UnicodeFunctor_2_8
+#define UnicodeMatcher UnicodeMatcher_2_8
+#define UnicodeNameTransliterator UnicodeNameTransliterator_2_8
+#define UnicodeReplacer UnicodeReplacer_2_8
+#define UnicodeSet UnicodeSet_2_8
+#define UnicodeSetIterator UnicodeSetIterator_2_8
+#define UnicodeString UnicodeString_2_8
+#define UnicodeToHexTransliterator UnicodeToHexTransliterator_2_8
+#define UppercaseTransliterator UppercaseTransliterator_2_8
+#define ValueRecord ValueRecord_2_8
+#define ValueRuns ValueRuns_2_8
+#define locale_set_default_internal locale_set_default_internal_2_8
+#define util64_fromDouble util64_fromDouble_2_8
+#define util64_pow util64_pow_2_8
+#define util64_tou util64_tou_2_8
+#define util64_utoi util64_utoi_2_8
#endif
#endif
Index: ures.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/ures.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- ures.h 10 Sep 2003 02:42:05 -0000 1.5
+++ ures.h 6 Apr 2004 10:08:09 -0000 1.6
@@ -83,6 +83,14 @@
URES_ALIAS=3,
/**
+ * Internal use only.
+ * Alternative resource type constant for tables of key-value pairs.
+ * Never returned by ures_getType().
+ * @internal
+ */
+ URES_TABLE32=4,
+
+ /**
* Resource type constant for a single 28-bit integer, interpreted as
* signed or unsigned by the ures_getInt() or ures_getUInt() function.
* @see ures_getInt
@@ -214,7 +222,7 @@
* <STRONG>Tables</STRONG>: returns the number of resources in the table
* <STRONG>single string</STRONG>: returns 1
*@see ures_getSize
- * @stable ICU 2.0
+ * @deprecated ICU 2.8 User ures_getSize instead
*/
U_CAPI int32_t U_EXPORT2
ures_countArrayItems(const UResourceBundle* resourceBundle,
@@ -239,7 +247,7 @@
* @return A version number string as specified in the resource bundle or its parent.
* The caller does not own this string.
* @see ures_getVersion
- * @stable ICU 2.0
+ * @deprecated ICU 2.8 Use ures_getVersion instead.
*/
U_CAPI const char* U_EXPORT2
ures_getVersionNumber(const UResourceBundle* resourceBundle);
@@ -267,12 +275,31 @@
* @param resourceBundle resource bundle in question
* @param status just for catching illegal arguments
* @return A Locale name
- * @stable ICU 2.0
+ * @deprecated ICU 2.8 Use ures_getLocaleByType instead.
*/
U_CAPI const char* U_EXPORT2
ures_getLocale(const UResourceBundle* resourceBundle,
UErrorCode* status);
+
+/**
+ * Return the name of the Locale associated with this ResourceBundle.
+ * You can choose between requested, valid and real locale.
+ *
+ * @param resourceBundle resource bundle in question
+ * @param type You can choose between requested, valid and actual
+ * locale. For description see the definition of
+ * ULocDataLocaleType in uloc.h
+ * @param status just for catching illegal arguments
+ * @return A Locale name
+ * @draft ICU 2.8
+ */
+U_CAPI const char* U_EXPORT2
+ures_getLocaleByType(const UResourceBundle* resourceBundle,
+ ULocDataLocaleType type,
+ UErrorCode* status);
+
+
/**
* Same as ures_open() but uses the fill-in parameter instead of allocating
* a bundle, if r!=NULL.
@@ -401,10 +428,9 @@
/**
* Returns the size of a resource. Size for scalar types is always 1,
* and for vector/table types is the number of child resources.
- * @warning Currently, this function works correctly for string, table and
- * array resources. For other types of resources, the result is
- * undefined. This is a bug and will be fixed.
- *
+ * @warning Integer array is treated as a scalar type. There are no
+ * APIs to access individual members of an integer array. It
+ * is always returned as a whole.
* @param resourceBundle a resource
* @return number of resources in a given resource.
* @stable ICU 2.0
Index: uscript.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uscript.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- uscript.h 10 Sep 2003 02:42:05 -0000 1.5
+++ uscript.h 6 Apr 2004 10:08:09 -0000 1.6
@@ -19,7 +19,7 @@
/**
* Constants for Unicode script values from ScriptNames.txt .
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
typedef enum UScriptCode {
USCRIPT_INVALID_CODE = -1,
@@ -65,7 +65,7 @@
USCRIPT_TIBETAN = 39, /* Tibt */
/** Canadian_Aboriginal script. @draft ICU 2.6 */
USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
- /** Canadian_Aboriginal script (alias). @draft ICU 2.2 */
+ /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
USCRIPT_YI = 41, /* Yiii */
USCRIPT_TAGALOG = 42, /* Tglg */
Index: uset.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uset.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- uset.h 10 Sep 2003 02:42:05 -0000 1.1
+++ uset.h 6 Apr 2004 10:08:09 -0000 1.2
@@ -40,14 +40,14 @@
#endif
/**
- * Bitmask values to be passed to the UnicodeSet constructor or
- * applyPattern() taking an option parameter.
- * @draft
+ * Bitmask values to be passed to uset_openPatternOptions() or
+ * uset_applyPattern() taking an option parameter.
+ * @draft ICU 2.4
*/
enum {
/**
* Ignore white space within patterns unless quoted or escaped.
- * @draft
+ * @draft ICU 2.4
*/
USET_IGNORE_SPACE = 1,
@@ -55,7 +55,7 @@
* Enable case insensitive matching. E.g., "[ab]" with this flag
* will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will
* match all except 'a', 'A', 'b', and 'B'.
- * @draft
+ * @draft ICU 2.4
*/
USET_CASE_INSENSITIVE = 2,
@@ -155,6 +155,32 @@
uset_close(USet* set);
/**
+ * Modifies the set to represent the set specified by the given
+ * pattern. See the UnicodeSet class description for the syntax of
+ * the pattern language. See also the User Guide chapter about UnicodeSet.
+ * <em>Empties the set passed before applying the pattern.</em>
+ * @param set The set to which the pattern is to be applied.
+ * @param pattern A pointer to UChar string specifying what characters are in the set.
+ * The character at pattern[0] must be a '['.
+ * @param patternLength The length of the UChar string. -1 if NUL terminated.
+ * @param options A bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param status Returns an error if the pattern cannot be parsed.
+ * @return Upon successful parse, the value is either
+ * the index of the character after the closing ']'
+ * of the parsed pattern.
+ * If the status code indicates failure, then the return value
+ * is the index of the error in the source.
+ *
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uset_applyPattern(USet *set,
+ const UChar *pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode *status);
+
+/**
* Returns a string representation of this set. If the result of
* calling this function is passed to a uset_openPattern(), it
* will produce another set that is equal to this one.
@@ -205,7 +231,7 @@
* @param set the object to which to add the character
* @param start the first character of the range to add, inclusive
* @param end the last character of the range to add, inclusive
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI void U_EXPORT2
uset_addRange(USet* set, UChar32 start, UChar32 end);
@@ -237,7 +263,7 @@
* @param set the object to which to add the character
* @param start the first character of the range to remove, inclusive
* @param end the last character of the range to remove, inclusive
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI void U_EXPORT2
uset_removeRange(USet* set, UChar32 start, UChar32 end);
@@ -299,7 +325,7 @@
* @param start the first character of the range to test, inclusive
* @param end the last character of the range to test, inclusive
* @return TRUE if set contains the range
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI UBool U_EXPORT2
uset_containsRange(const USet* set, UChar32 start, UChar32 end);
Index: usetiter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/usetiter.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- usetiter.h 10 Sep 2003 02:42:05 -0000 1.1
+++ usetiter.h 6 Apr 2004 10:08:09 -0000 1.2
@@ -48,7 +48,7 @@
* }
* </pre>
* @author M. Davis
- * @draft ICU 2.2
+ * @draft ICU 2.4
*/
class U_COMMON_API UnicodeSetIterator : public UObject {
@@ -205,18 +205,18 @@
void reset();
/**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ * ICU "poor man's RTTI", returns a UClassID for this class.
*
- * @draft ICU 2.2
+ * @draft ICU 2.4
*/
- virtual inline UClassID getDynamicClassID() const;
+ static UClassID getStaticClassID();
/**
- * ICU "poor man's RTTI", returns a UClassID for this class.
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
*
- * @draft ICU 2.2
+ * @draft ICU 2.4
*/
- static inline UClassID getStaticClassID();
+ virtual UClassID getDynamicClassID() const;
// ======================= PRIVATES ===========================
@@ -270,22 +270,7 @@
*/
virtual void loadRange(int32_t range);
-private:
-
- /**
- * The address of this static class variable serves as this class's ID
- * for ICU "poor man's RTTI".
- */
- static const char fgClassID;
};
-
-inline UClassID
-UnicodeSetIterator::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-UnicodeSetIterator::getDynamicClassID() const
-{ return UnicodeSetIterator::getStaticClassID(); }
inline UBool UnicodeSetIterator::isString() const {
return codepoint == (UChar32)IS_STRING;
Index: ustring.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/ustring.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- ustring.h 10 Sep 2003 02:42:05 -0000 1.4
+++ ustring.h 6 Apr 2004 10:08:09 -0000 1.5
@@ -453,7 +453,7 @@
*
* @return <0 or 0 or >0 as usual for string comparisons
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_strCompare(const UChar *s1, int32_t length1,
@@ -488,7 +488,7 @@
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
#endif
@@ -531,7 +531,7 @@
*
* @return <0 or 0 or >0 as usual for string comparisons
*
- * @draft ICU 2.2
+ * @stable ICU 2.2
*/
U_CAPI int32_t U_EXPORT2
u_strCaseCompare(const UChar *s1, int32_t length1,
Index: utf.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/utf.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- utf.h 10 Sep 2003 02:42:05 -0000 1.4
+++ utf.h 6 Apr 2004 10:08:09 -0000 1.5
@@ -258,6 +258,22 @@
!U_IS_UNICODE_NONCHAR(c)))
/**
+ * Is this code point a BMP code point (U+0000..U+ffff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @draft ICU 2.8
+ */
+#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff)
+
+/**
+ * Is this code point a supplementary code point (U+10000..U+10ffff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @draft ICU 2.8
+ */
+#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff)
+
+/**
* Is this code point a lead surrogate (U+d800..U+dbff)?
* @param c 32-bit code point
* @return TRUE or FALSE
Index: utf16.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/utf16.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- utf16.h 10 Sep 2003 02:42:05 -0000 1.3
+++ utf16.h 6 Apr 2004 10:08:09 -0000 1.4
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999-2001, International Business Machines
+* Copyright (C) 1999-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
Index: utf8.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/utf8.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- utf8.h 10 Sep 2003 02:42:05 -0000 1.4
+++ utf8.h 6 Apr 2004 10:08:09 -0000 1.5
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999-2001, International Business Machines
+* Copyright (C) 1999-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -156,7 +156,7 @@
* byte sequence.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_GET
@@ -178,7 +178,7 @@
* c is set to a negative value.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param start starting string offset
* @param i string offset, start<=i<length
* @param length string length
@@ -205,7 +205,7 @@
* The result is undefined if the offset points to a trail byte
* or an illegal UTF-8 sequence.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_NEXT
@@ -241,7 +241,7 @@
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
* c is set to a negative value.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param i string offset, i<length
* @param length string length
* @param c output UChar32 variable, set to <0 in case of an error
@@ -250,9 +250,9 @@
*/
#define U8_NEXT(s, i, length, c) { \
(c)=(s)[(i)++]; \
- if((c)>=0x80) { \
+ if(((uint8_t)(c))>=0x80) { \
if(U8_IS_LEAD(c)) { \
- (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, -1); \
+ (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -1); \
} else { \
(c)=U_SENTINEL; \
} \
@@ -266,7 +266,7 @@
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
* Otherwise, the result is undefined.
*
- * @param s const UChar * string buffer
+ * @param s const uint8_t * string buffer
* @param i string offset
* @param c code point to append
* @see U8_APPEND
@@ -300,7 +300,7 @@
* If the code point is not valid or trail bytes do not fit,
* then isError is set to TRUE.
*
- * @param s const UChar * string buffer
+ * @param s const uint8_t * string buffer
* @param i string offset, i<length
* @param length size of the string buffer
* @param c code point to append
@@ -321,7 +321,7 @@
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param i string offset
* @see U8_FWD_1
* @draft ICU 2.4
@@ -335,7 +335,7 @@
* (Post-incrementing iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param i string offset, i<length
* @param length string length
* @see U8_FWD_1_UNSAFE
@@ -361,7 +361,7 @@
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param i string offset
* @param n number of code points to skip
* @see U8_FWD_N
@@ -381,7 +381,7 @@
* (Post-incrementing iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param i string offset, i<length
* @param length string length
* @param n number of code points to skip
@@ -404,7 +404,7 @@
* Otherwise, it is not modified.
* "Unsafe" macro, assumes well-formed UTF-8.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param i string offset
* @see U8_SET_CP_START
* @draft ICU 2.4
@@ -421,7 +421,7 @@
* Otherwise, it is not modified.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param start starting string offset (usually 0)
* @param i string offset, start<=i
* @see U8_SET_CP_START_UNSAFE
@@ -448,7 +448,7 @@
* will be returned as the code point.
* The result is undefined if the offset is behind an illegal UTF-8 sequence.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_PREV
@@ -489,7 +489,7 @@
* will be returned as the code point.
* If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param start starting string offset (usually 0)
* @param i string offset, start<=i
* @param c output UChar32 variable, set to <0 in case of an error
@@ -513,7 +513,7 @@
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param i string offset
* @see U8_BACK_1
* @draft ICU 2.4
@@ -528,7 +528,7 @@
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param start starting string offset (usually 0)
* @param i string offset, start<=i
* @see U8_BACK_1_UNSAFE
@@ -547,7 +547,7 @@
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param i string offset
* @param n number of code points to skip
* @see U8_BACK_N
@@ -568,7 +568,7 @@
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param start index of the start of the string
* @param i string offset, i<length
* @param n number of code points to skip
@@ -591,7 +591,7 @@
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param i string offset
* @see U8_SET_CP_LIMIT
* @draft ICU 2.4
@@ -609,7 +609,7 @@
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
- * @param s const UChar * string
+ * @param s const uint8_t * string
* @param start starting string offset (usually 0)
* @param i string offset, start<=i<=length
* @param length string length
Index: utypes.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/utypes.h,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -d -r1.9 -r1.10
--- utypes.h 10 Sep 2003 02:42:05 -0000 1.9
+++ utypes.h 6 Apr 2004 10:08:09 -0000 1.10
@@ -528,9 +528,9 @@
U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */
U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */
U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */
- U_INVALID_CHAR_FOUND = 10, /**< In the Character conversion routines: Invalid character or sequence was encountered. In other APIs: Invalid character or code point name. */
- U_TRUNCATED_CHAR_FOUND = 11, /**< In the Character conversion routines: More bytes are required to complete the conversion successfully */
- U_ILLEGAL_CHAR_FOUND = 12, /**< In codeset conversion: a sequence that does NOT belong in the codepage has been encountered */
+ U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
+ U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */
+ U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units.. */
U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */
U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */
U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
@@ -546,6 +546,7 @@
It is very possible that a circular alias definition has occured */
U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
+ U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
U_STANDARD_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for standard errors */
/*
@@ -624,6 +625,7 @@
U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
+ U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
U_BRK_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for Break Iterator failures */
/*
@@ -650,16 +652,23 @@
* The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes
*/
U_IDNA_ERROR_START=0x10400,
- U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR,
- U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR,
+ U_IDNA_PROHIBITED_ERROR,
+ U_IDNA_UNASSIGNED_ERROR,
U_IDNA_CHECK_BIDI_ERROR,
U_IDNA_STD3_ASCII_RULES_ERROR,
U_IDNA_ACE_PREFIX_ERROR,
U_IDNA_VERIFICATION_ERROR,
U_IDNA_LABEL_TOO_LONG_ERROR,
U_IDNA_ERROR_LIMIT,
+ /*
+ * Aliases for StringPrep
+ */
+ U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
+ U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
+ U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
+
- U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
+ U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
} UErrorCode;
/* Use the following to determine if an UErrorCode represents */
Index: uversion.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uversion.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- uversion.h 10 Sep 2003 02:42:05 -0000 1.5
+++ uversion.h 6 Apr 2004 10:08:09 -0000 1.6
@@ -61,7 +61,7 @@
* This value will change in the subsequent releases of ICU
* @draft ICU 2.6
*/
-#define U_ICU_VERSION_MINOR_NUM 6
+#define U_ICU_VERSION_MINOR_NUM 8
/** The current ICU patchlevel version as an integer.
* This value will change in the subsequent releases of ICU
@@ -73,20 +73,20 @@
* This value will change in the subsequent releases of ICU
* @draft ICU 2.6
*/
-#define U_ICU_VERSION_SUFFIX _2_6
+#define U_ICU_VERSION_SUFFIX _2_8
/** The current ICU library version as a dotted-decimal string. The patchlevel
* only appears in this string if it non-zero.
* This value will change in the subsequent releases of ICU
* @draft ICU 2.4
*/
-#define U_ICU_VERSION "2.6"
+#define U_ICU_VERSION "2.8"
/** The current ICU library major/minor version as a string without dots, for library name suffixes.
* This value will change in the subsequent releases of ICU
* @draft ICU 2.6
*/
-#define U_ICU_VERSION_SHORT "26"
+#define U_ICU_VERSION_SHORT "28"
/** An ICU version consists of up to 4 numbers from 0..255.
* @draft ICU 2.4
@@ -109,7 +109,7 @@
typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
#if U_HAVE_NAMESPACE && defined(XP_CPLUSPLUS)
-#define U_ICU_NAMESPACE icu_2_6
+#define U_ICU_NAMESPACE icu_2_8
namespace U_ICU_NAMESPACE { }
namespace icu = U_ICU_NAMESPACE;
U_NAMESPACE_USE
@@ -174,21 +174,23 @@
* compression, generating quad level always when strength is quad or more
* version 4 - ICU 2.2 - tracking UCA changes, ignore completely ignorables
* in contractions, ignore primary ignorables after shifted
+ * version 5 - ICU 2.8 - changed implicit generation code
* This value may change in the subsequent releases of ICU
* @draft ICU 2.4
*/
-#define UCOL_RUNTIME_VERSION 4
+#define UCOL_RUNTIME_VERSION 5
/** Builder code version. When this is different, same tailoring might result
* in assigning different collation elements to code points
* version 2 was in ICU 1.8.1. added support for prefixes, tweaked canonical
* closure. However, the tailorings should probably get same CEs assigned
* version 5 - ICU 2.2 - fixed some bugs, renamed some indirect values.
+ * version 6 - ICU 2.8 - fixed bug in builder that allowed 0xFF in primary values
* Backward compatible with the old rules.
* This value may change in the subsequent releases of ICU
* @draft ICU 2.4
*/
-#define UCOL_BUILDER_VERSION 5
+#define UCOL_BUILDER_VERSION 6
/** *** Removed *** Instead we use the data we read from FractionalUCA.txt
* This is the version of FractionalUCA.txt tailoring rules
- Previous message: [sword-cvs] icu-sword/source/layout AnchorTables.cpp,1.3,1.4 ArabicLayoutEngine.cpp,1.3,1.4 ArabicLayoutEngine.h,1.3,1.4 ArabicShaping.cpp,1.3,1.4 ClassDefinitionTables.cpp,1.3,1.4 ContextualGlyphSubstProc.h,1.3,1.4 ContextualSubstSubtables.cpp,1.3,1.4 ContextualSubstSubtables.h,1.3,1.4 DefaultCharMapper.h,1.3,1.4 GXLayoutEngine.cpp,1.3,1.4 GXLayoutEngine.h,1.3,1.4 GlyphIterator.cpp,1.3,1.4 GlyphIterator.h,1.3,1.4 GlyphPositioningTables.cpp,1.3,1.4 GlyphSubstitutionTables.cpp,1.3,1.4 GlyphSubstitutionTables.h,1.3,1.4 HanLayoutEngine.cpp,1.1,1.2 HanLayoutEngine.h,1.1,1.2 IndicLayoutEngine.cpp,1.3,1.4 IndicLayoutEngine.h,1.3,1.4 IndicRearrangementProcessor.cpp,1.3,1.4 IndicRearrangementProcessor.h,1.3,1.4 IndicReordering.cpp,1.3,1.4 IndicReordering.h,1.3,1.4 LEFontInstance.h,1.3,1.4 LEGlyphFilter.h,1.3,1.4 LELanguages.h,1.1,1.2 LEScripts.h,1.3,1.4 LESwaps.h,1.3,1.4 LETypes.h,1.3,1.4 LayoutEngine.cpp,1.3,1.4 LayoutEngine.h,1.3,1.4 LigatureSubstProc.cpp,1.3,1.4 LigatureSubstProc.h,1.3,1.4 LookupProcessor.cpp,1.3,1.4 LookupProcessor.h,1.3,1.4 MPreFixups.cpp,1.1,1.2 MPreFixups.h,1.1,1.2 Makefile.in,1.4,1.5 MarkToBasePosnSubtables.cpp,1.3,1.4 MarkToMarkPosnSubtables.cpp,1.3,1.4 MultipleSubstSubtables.cpp,1.3,1.4 OpenTypeLayoutEngine.cpp,1.4,1.5 OpenTypeLayoutEngine.h,1.3,1.4 ScriptAndLanguage.h,1.3,1.4 ScriptAndLanguageTags.cpp,1.3,1.4 ScriptAndLanguageTags.h,1.3,1.4 SegmentArrayProcessor.h,1.3,1.4 SegmentSingleProcessor.h,1.3,1.4 SimpleArrayProcessor.h,1.3,1.4 SingleTableProcessor.h,1.3,1.4 ThaiLayoutEngine.cpp,1.3,1.4 ThaiLayoutEngine.h,1.3,1.4 ThaiShaping.cpp,1.4,1.5 ThaiShaping.h,1.3,1.4 ThaiStateTables.cpp,1.3,1.4 TrimmedArrayProcessor.h,1.3,1.4 layout.dsp,1.4,1.5 layout.rc,1.3,1.4 layout.vcproj,1.1,1.2
- Next message: [sword-cvs] icu-sword/source/test/testdata conversion.txt,NONE,1.1 nfs4_cis_prep.txt,NONE,1.1 nfs4_cs_prep_ci.txt,NONE,1.1 nfs4_cs_prep_cs.txt,NONE,1.1 nfs4_mixed_prep_p.txt,NONE,1.1 nfs4_mixed_prep_s.txt,NONE,1.1 ra.txt,NONE,1.1 riwords.txt,NONE,1.1 test4x.ucm,NONE,1.1 CollationTest_NON_IGNORABLE_STUB.txt,1.1,1.2 CollationTest_SHIFTED_STUB.txt,1.1,1.2 DataDrivenCollationTest.txt,1.1,1.2 idna_rules.txt,1.1,1.2 rbbitst.txt,1.1,1.2 regextst.txt,1.1,1.2 te.txt,1.3,1.4 test1.ucm,1.3,1.4 test3.ucm,1.3,1.4 test4.ucm,1.3,1.4 testaliases.txt,1.1,1.2 testdata.mk,1.4,1.5 testtypes.txt,1.4,1.5 translit_rules.txt,1.1,1.2 th18057.txt,1.5,NONE
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]