[sword-cvs] icu-sword/source/common/unicode symtable.h,NONE,1.1 usprep.h,NONE,1.1 utrace.h,NONE,1.1 brkiter.h,1.1,1.2 caniter.h,1.1,1.2 chariter.h,1.4,1.5 dbbi.h,1.1,1.2 locid.h,1.4,1.5 normlzr.h,1.5,1.6 parsepos.h,1.1,1.2 platform.h.in,1.5,1.6 pos400.h,1.3,1.4 putil.h,1.4,1.5 pwin32.h,1.5,1.6 rbbi.h,1.1,1.2 rep.h,1.4,1.5 resbund.h,1.4,1.5 schriter.h,1.4,1.5 strenum.h,1.1,1.2 ubrk.h,1.1,1.2 uchar.h,1.5,1.6 uchriter.h,1.4,1.5 uclean.h,1.4,1.5 ucnv.h,1.4,1.5 ucnv_err.h,1.4,1.5 uenum.h,1.1,1.2 uidna.h,1.1,1.2 uiter.h,1.1,1.2 uloc.h,1.4,1.5 umachine.h,1.4,1.5 umisc.h,1.3,1.4 unifilt.h,1.1,1.2 unifunct.h,1.1,1.2 unimatch.h,1.1,1.2 uniset.h,1.1,1.2 unistr.h,1.5,1.6 unorm.h,1.4,1.5 uobject.h,1.1,1.2 urename.h,1.5,1.6 ures.h,1.5,1.6 uscript.h,1.5,1.6 uset.h,1.1,1.2 usetiter.h,1.1,1.2 ustring.h,1.4,1.5 utf.h,1.4,1.5 utf16.h,1.3,1.4 utf8.h,1.4,1.5 utypes.h,1.9,1.10 uversion.h,1.5,1.6

sword@www.crosswire.org sword@www.crosswire.org
Tue, 6 Apr 2004 03:11:19 -0700


Update of /cvs/core/icu-sword/source/common/unicode
In directory www:/tmp/cvs-serv8911/source/common/unicode

Modified Files:
	brkiter.h caniter.h chariter.h dbbi.h locid.h normlzr.h 
	parsepos.h platform.h.in pos400.h putil.h pwin32.h rbbi.h 
	rep.h resbund.h schriter.h strenum.h ubrk.h uchar.h uchriter.h 
	uclean.h ucnv.h ucnv_err.h uenum.h uidna.h uiter.h uloc.h 
	umachine.h umisc.h unifilt.h unifunct.h unimatch.h uniset.h 
	unistr.h unorm.h uobject.h urename.h ures.h uscript.h uset.h 
	usetiter.h ustring.h utf.h utf16.h utf8.h utypes.h uversion.h 
Added Files:
	symtable.h usprep.h utrace.h 
Log Message:
ICU 2.8 sync

--- NEW FILE: symtable.h ---
/*
**********************************************************************
*   Copyright (c) 2000-2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   02/04/00    aliu        Creation.
**********************************************************************
*/
#ifndef SYMTABLE_H
#define SYMTABLE_H

#include "unicode/utypes.h"
#include "unicode/uobject.h"

U_NAMESPACE_BEGIN

class ParsePosition;
class UnicodeFunctor;
class UnicodeSet;
class UnicodeString;

/**
 * An interface that defines both lookup protocol and parsing of
 * symbolic names.
 *
 * <p>A symbol table maintains two kinds of mappings.  The first is
 * between symbolic names and their values.  For example, if the
 * variable with the name "start" is set to the value "alpha"
 * (perhaps, though not necessarily, through an expression such as
 * "$start=alpha"), then the call lookup("start") will return the
 * char[] array ['a', 'l', 'p', 'h', 'a'].
 *
 * <p>The second kind of mapping is between character values and
 * UnicodeMatcher objects.  This is used by RuleBasedTransliterator,
 * which uses characters in the private use area to represent objects
 * such as UnicodeSets.  If U+E015 is mapped to the UnicodeSet [a-z],
 * then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
 *
 * <p>Finally, a symbol table defines parsing behavior for symbolic
 * names.  All symbolic names start with the SYMBOL_REF character.
 * When a parser encounters this character, it calls parseReference()
 * with the position immediately following the SYMBOL_REF.  The symbol
 * table parses the name, if there is one, and returns it.
 *
 * @draft ICU 2.8
 */
class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ {
public:

    /**
     * The character preceding a symbol reference name.
     */
    enum { SYMBOL_REF = 0x0024 /*$*/ };

    /**
     * Destructor.
     */
    virtual ~SymbolTable();

    /**
     * Lookup the characters associated with this string and return it.
     * Return <tt>NULL</tt> if no such name exists.  The resultant
     * string may have length zero.
     * @param s the symbolic name to lookup
     * @return a string containing the name's value, or <tt>NULL</tt> if
     * there is no mapping for s.
     */
    virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;

    /**
     * Lookup the UnicodeMatcher associated with the given character, and
     * return it.  Return <tt>NULL</tt> if not found.
     * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
     * @return the UnicodeMatcher object represented by the given
     * character, or NULL if there is no mapping for ch.
     */
    virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0;

    /**
     * Parse a symbol reference name from the given string, starting
     * at the given position.  If no valid symbol reference name is
     * found, return the empty string and leave pos unchanged.  That is, if the
     * character at pos cannot start a name, or if pos is at or after
     * text.length(), then return an empty string.  This indicates an
     * isolated SYMBOL_REF character.
     * @param text the text to parse for the name
     * @param pos on entry, the index of the first character to parse.
     * This is the character following the SYMBOL_REF character.  On
     * exit, the index after the last parsed character.  If the parse
     * failed, pos is unchanged on exit.
     * @param limit the index after the last character to be parsed.
     * @return the parsed name, or an empty string if there is no
     * valid symbolic name at the given position.
     */
    virtual UnicodeString parseReference(const UnicodeString& text,
                                         ParsePosition& pos, int32_t limit) const = 0;
};
U_NAMESPACE_END

#endif

--- NEW FILE: usprep.h ---
/*
 *******************************************************************************
 *
 *   Copyright (C) 2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 *   file name:  usprep.h
 *   encoding:   US-ASCII
 *   tab size:   8 (not used)
 *   indentation:4
 *
 *   created on: 2003jul2
 *   created by: Ram Viswanadha
 */

#ifndef __USPREP_H__
#define __USPREP_H__

#include "unicode/utypes.h"
/**
 *\file
 * StringPrep API implements the StingPrep framework as described by RFC 3454.
 * StringPrep prepares Unicode strings for use in network protocols.
 * Profiles of StingPrep are set of rules and data according to with the
 * Unicode Strings are prepared. Each profiles contains tables which describe
 * how a code point should be treated. The tables are broadly classied into
 * <ul>
 *     <li> Unassinged Table: Contains code points that are unassigned 
 *          in the Unicode Version supported by StringPrep. Currently 
 *          RFC 3454 supports Unicode 3.2. </li>
 *     <li> Prohibited Table: Contains code points that are prohibted from
 *          the output of the StringPrep processing function. </li>
 *     <li> Mapping Table: Contains code ponts that are deleted from the output or case mapped. </li>
 * </ul>
 * 
 * The procedure for preparing Unicode strings:
 * <ol>
 *      <li> Map: For each character in the input, check if it has a mapping
 *           and, if so, replace it with its mapping. </li>
 *      <li> Normalize: Possibly normalize the result of step 1 using Unicode
 *           normalization. </li>
 *      <li> Prohibit: Check for any characters that are not allowed in the
 *        output.  If any are found, return an error.</li>
 *      <li> Check bidi: Possibly check for right-to-left characters, and if
 *           any are found, make sure that the whole string satisfies the
 *           requirements for bidirectional strings.  If the string does not
 *           satisfy the requirements for bidirectional strings, return an
 *           error.  </li>
 * </ol>
 * @author Ram Viswanadha
 */
#if !UCONFIG_NO_IDNA

#include "unicode/parseerr.h"
/**
 * The StringPrep profile
 * @draft ICU 2.8
 */
typedef struct UStringPrepProfile UStringPrepProfile;


/** 
 * Option to prohibit processing of unassigned code points in the input
 * 
 * @see  usprep_prepare
 * @draft ICU 2.8
 */
#define USPREP_DEFAULT 0x0000

/** 
 * Option to allow processing of unassigned code points in the input
 * 
 * @see  usprep_prepare
 * @draft ICU 2.8
 */
#define USPREP_ALLOW_UNASSIGNED 0x0001



/**
 * Creates a StringPrep profile from the data file.
 *
 * @param path      string containing the full path pointing to the directory
 *                  where the profile reside followed by the package name
 *                  e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system.
 *                  if NULL, ICU default data files will be used.
 * @param fileName  name of the profile file to be opened
 * @param status    ICU error code in/out parameter. Must not be NULL.
 *                  Must fulfill U_SUCCESS before the function call.
 * @return Pointer to UStringPrepProfile that is opened. Should be closed by
 * calling usprep_close()
 * @see usprep_close()
 * @draft ICU 2.8
 */
U_CAPI UStringPrepProfile* U_EXPORT2
usprep_open(const char* path, 
            const char* fileName,
            UErrorCode* status);


/**
 * Closes the profile
 * @param profile The profile to close
 * @draft ICU 2.8
 */
U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile* profile);


/**
 * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
 * checks for prohited and BiDi characters in the order defined by RFC 3454
 * depending on the options specified in the profile.
 *
 * @param prep          The profile to use 
 * @param src           Pointer to UChar buffer containing the string to prepare
 * @param srcLength     Number of characters in the source string
 * @param dest          Pointer to the destination buffer to receive the output
 * @param destCapacity  The capacity of destination array
 * @param options       A bit set of options:
 *
 *  - USPREP_NONE               Prohibit processing of unassigned code points in the input
 *
 *  - USPREP_ALLOW_UNASSIGNED   Treat the unassigned code points are in the input 
 *                              as normal Unicode code points.
 *
 * @param parseError        Pointer to UParseError struct to receive information on position 
 *                          of error if an error is encountered. Can be NULL.
 * @param status            ICU in/out error code parameter.
 *                          U_INVALID_CHAR_FOUND if src contains
 *                          unmatched single surrogates.
 *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
 *                          too many code points.
 *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
 * @return The number of UChars in the destination buffer
 * @draft ICU 2.8
 */

U_CAPI int32_t U_EXPORT2
usprep_prepare(   const UStringPrepProfile* prep,
                  const UChar* src, int32_t srcLength, 
                  UChar* dest, int32_t destCapacity,
                  int32_t options,
                  UParseError* parseError,
                  UErrorCode* status );


#endif /* #if !UCONFIG_NO_IDNA */

#endif

--- NEW FILE: utrace.h ---
/*
*******************************************************************************
*
*   Copyright (C) 2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  utrace.h
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2003aug06
*   created by: Markus W. Scherer
*
*   Definitions for ICU tracing/logging.
*
*/

#ifndef __UTRACE_H__
#define __UTRACE_H__

#include <stdarg.h>
#include "unicode/utypes.h"

U_CDECL_BEGIN

/**
 * Trace severity levels.  Higher levels increase the verbosity of the trace output.
 * @see utrace_setLevel
 * @draft ICU 2.8
 */

typedef enum UTraceLevel {
    /** Disable all tracing  @draft ICU 2.8*/
    UTRACE_OFF=-1,
    /** Trace error conditions only  @draft ICU 2.8*/
    UTRACE_ERROR=0,
    /** Trace errors and warnings  @draft ICU 2.8*/
    UTRACE_WARNING=3,
    /** Trace opens and closes of ICU services  @draft ICU 2.8*/
    UTRACE_OPEN_CLOSE=5,
    /** Trace an intermediate number of ICU operations  @draft ICU 2.8*/
    UTRACE_INFO=7,
    /** Trace the maximum number of ICU operations  @draft ICU 2.8*/
    UTRACE_VERBOSE=9
} UTraceLevel;


/**
 * Setter for the trace level.
 * @param traceLevel A UTraceLevel value.
 * @draft ICU 2.8
 */
U_CAPI void U_EXPORT2
utrace_setLevel(int32_t traceLevel);

/**
 * Getter for the trace level.
 * @param traceLevel A UTraceLevel value.
 * @draft ICU 2.8
 */
U_CAPI int32_t U_EXPORT2
utrace_getLevel(void);

/* Trace function pointers types  ----------------------------- */

/**
  *  Type signature for the trace function to be called when entering a function.
  *  @param context value supplied at the time the trace functions are set.
  *  @param fnNumber Enum value indicating the ICU function being entered.
  *  @draft ICU 2.8
  */
typedef void U_CALLCONV
UTraceEntry(const void *context, int32_t fnNumber);

/**
  *  Type signature for the trace function to be called when exiting from a function.
  *  @param context value supplied at the time the trace functions are set.
  *  @param fnNumber Enum value indicating the ICU function being exited.
  *  @param fmt     A formatting string that describes the number and types
  *                 of arguments included with the variable args.  The fmt
  *                 string has the same form as the utrace_vformat format
  *                 string.
  *  @param args    A variable arguments list.  Contents are described by
  *                 the fmt parameter.
  *  @see   utrace_vformat
  *  @draft ICU 2.8
  */
typedef void U_CALLCONV
UTraceExit(const void *context, int32_t fnNumber, 
           const char *fmt, va_list args);

/**
  *  Type signature for the trace function to be called from within an ICU function
  *  to display data or messages.
  *  @param context  value supplied at the time the trace functions are set.
  *  @param fnNumber Enum value indicating the ICU function being exited.
  *  @param level    The current tracing level
  *  @param fmt      A format string describing the tracing data that is supplied
  *                  as variable args
  *  @param args     The data being traced, passed as variable args.
  *  @draft ICU 2.8
  */
typedef void U_CALLCONV
UTraceData(const void *context, int32_t fnNumber, int32_t level,
           const char *fmt, va_list args);

/**
  *  Set ICU Tracing functions.  Installs application-provided tracing
  *  functions into ICU.  After doing this, subsequent ICU operations
  *  will call back to the installed functions, providing a trace
  *  of the use of ICU.  Passing a NULL pointer for a tracing function
  *  is allowed, and inhibits tracing action at points where that function
  *  would be called.
  *  <p>
  *  Tracing and Threads:  Tracing functions are global to a process, and
  *  will be called in response to ICU operations performed by any
  *  thread.  If tracing of an individual thread is desired, the
  *  tracing functions must themselves filter by checking that the
  *  current thread is the desired thread.
  *
  *  @param context an uninterpretted pointer.  Whatever is passed in
  *                 here will in turn be passed to each of the tracing
  *                 functions UTraceEntry, UTraceExit and UTraceData.
  *                 ICU does not use or alter this pointer.
  *  @param e       Callback function to be called on entry to a 
  *                 a traced ICU function.
  *  @param x       Callback function to be called on exit from a
  *                 traced ICU function.
  *  @param d       Callback function to be called from within a 
  *                 traced ICU function, for the purpose of providing
  *                 data to the trace.
  *
  *  @draft ICU 2.8
  */
U_CAPI void U_EXPORT2
utrace_setFunctions(const void *context,
                    UTraceEntry *e, UTraceExit *x, UTraceData *d);

/**
  * Get the currently installed ICU tracing functions.   Note that a null function
  *   pointer will be returned if no trace function has been set.
  *
  * @param context  The currently installed tracing context.
  * @param e        The currently installed UTraceEntry function.
  * @param x        The currently installed UTraceExit function.
  * @param d        The currently installed UTraceData function.
  * @draft ICU 2.8
  */
U_CAPI void U_EXPORT2
utrace_getFunctions(const void **context,
                    UTraceEntry **e, UTraceExit **x, UTraceData **d);



/*
 *
 * ICU trace format string syntax
 *
 * Format Strings are passed to UTraceData functions, and define the
 * number and types of the trace data being passed on each call.
 *
 * The UTraceData function, which is supplied by the application,
 * not by ICU, can either forward the trace data (passed via
 * varargs) and the format string back to ICU for formatting into
 * a displayable string, or it can interpret the format itself,
 * and do as it wishes with the trace data.
 *
 *
 * Goals for the format string
 * - basic data output
 * - easy to use for trace programmer
 * - sufficient provision for data types for trace output readability
 * - well-defined types and binary portable APIs
 *
 * Non-goals
 * - printf compatibility
 * - fancy formatting
 * - argument reordering and other internationalization features
 *
 * ICU trace format strings contain plain text with argument inserts,
 * much like standard printf format strings.
 * Each insert begins with a '%', then optionally contains a 'v',
 * then exactly one type character.
 * Two '%' in a row represent a '%' instead of an insert.
 * The trace format strings need not have \n at the end.
 *
 *
 * Types
 * -----
 *
 * Type characters:
 * - c A char character in the default codepage.
 * - s A NUL-terminated char * string in the default codepage.
 * - S A UChar * string.  Requires two params, (ptr, length).  Length=-1 for nul term.
 * - b A byte (8-bit integer).
 * - h A 16-bit integer.  Also a 16 bit Unicode code unit.
 * - d A 32-bit integer.  Also a 20 bit Unicode code point value. 
 * - l A 64-bit integer.
 * - p A data pointer.
 *
 * Vectors
 * -------
 *
 * If the 'v' is not specified, then one item of the specified type
 * is passed in.
 * If the 'v' (for "vector") is specified, then a vector of items of the
 * specified type is passed in, via a pointer to the first item
 * and an int32_t value for the length of the vector.
 * Length==-1 means zero or NUL termination.  Works for vectors of all types.
 *
 * Note:  %vS is a vector of (UChar *) strings.  The strings must
 *        be nul terminated as there is no way to provide a
 *        separate length parameter for each string.  The length
 *        parameter (required for all vectors) is the number of
 *        strings, not the length of the strings.
 *
 * Examples
 * --------
 *
 * These examples show the parameters that will be passed to an application's
 *   UTraceData() function for various formats.
 *
 * - the precise formatting is up to the application!
 * - the examples use type casts for arguments only to _show_ the types of
 *   arguments without needing variable declarations in the examples;
 *   the type casts will not be necessary in actual code
 *
 * UTraceDataFunc(context, fnNumber, level,
 *              "There is a character %c in the string %s.",   // Format String 
 *              (char)c, (const char *)s);                     // varargs parameters
 * ->   There is a character 0x42 'B' in the string "Bravo".
 *
 * UTraceDataFunc(context, fnNumber, level,
 *              "Vector of bytes %vb vector of chars %vc",
 *              (const uint8_t *)bytes, (int32_t)bytesLength,
 *              (const char *)chars, (int32_t)charsLength);
 * ->  Vector of bytes
 *      42 63 64 3f [4]
 *     vector of chars
 *      "Bcd?"[4]
 *
 * UTraceDataFunc(context, fnNumber, level,
 *              "An int32_t %d and a whole bunch of them %vd",
 *              (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength);
 * ->   An int32_t 0xfffffffb and a whole bunch of them
 *      fffffffb 00000005 0000010a [3]
 *
 */



/**
  *  Trace output Formatter.  An application's UTraceData tracing functions may call
  *                 back to this function to format the trace output in a
  *                 human readable form.  Note that a UTraceData function may choose
  *                 to not format the data;  it could, for example, save it in
  *                 in the raw form it was received (more compact), leaving
  *                 formatting for a later trace analyis tool.
  *  @param outBuf  pointer to a buffer to receive the formatted output.  Output
  *                 will be nul terminated if there is space in the buffer -
  *                 if the length of the requested output < the output buffer size.
  *  @param capacity  Length of the output buffer.
  *  @param indent  Number of spaces to indent the output.  Intended to allow
  *                 data displayed from nested functions to be indented for readability.
  *  @param fmt     Format specification for the data to output
  *  @param args    Data to be formatted.
  *  @return        Length of formatted output, including the terminating NUL.
  *                 If buffer capacity is insufficient, the required capacity is returned. 
  *  @draft ICU 2.8
  */
U_CAPI int32_t U_EXPORT2
utrace_vformat(char *outBuf, int32_t capacity,
              int32_t indent, const char *fmt,  va_list args);

/**
  *  Trace output Formatter.  An application's UTraceData tracing functions may call
  *                 this function to format any additional trace data, beyond that
  *                 provided by default, in human readable form with the same
  *                 formatting conventions used by utrace_vformat().
  *  @param outBuf  pointer to a buffer to receive the formatted output.  Output
  *                 will be nul terminated if there is space in the buffer -
  *                 if the length of the requested output < the output buffer size.
  *  @param capacity  Length of the output buffer.
  *  @param indent  Number of spaces to indent the output.  Intended to allow
  *                 data displayed from nested functions to be indented for readability.
  *  @param fmt     Format specification for the data to output
  *  @param ...     Data to be formatted.
  *  @return        Length of formatted output, including the terminating NUL.
  *                 If buffer capacity is insufficient, the required capacity is returned. 
  *  @draft ICU 2.8
  */
U_CAPI int32_t U_EXPORT2
utrace_format(char *outBuf, int32_t capacity,
              int32_t indent, const char *fmt,  ...);



/* Trace function numbers --------------------------------------------------- */

/**
 * Get the name of a function from its trace function number.
 *
 * @param fnNumber The trace number for an ICU function.
 * @return The name string for the function.
 *
 * @see UTraceFunctionNumber
 * @draft ICU 2.8
 */
U_CAPI const char * U_EXPORT2
utrace_functionName(int32_t fnNumber);

/**
 *  These are the ICU functions that will be traced when tracing is enabled.
 *  @draft ICU 2.8
 */
typedef enum UTraceFunctionNumber {
    UTRACE_FUNCTION_START=0,
    UTRACE_U_INIT=UTRACE_FUNCTION_START,
    UTRACE_U_CLEANUP,
    UTRACE_FUNCTION_LIMIT,

    UTRACE_CONVERSION_START=0x1000,
    UTRACE_UCNV_OPEN=UTRACE_CONVERSION_START,
    UTRACE_UCNV_OPEN_PACKAGE,
    UTRACE_UCNV_OPEN_ALGORITHMIC,
    UTRACE_UCNV_CLONE,
    UTRACE_UCNV_CLOSE,
    UTRACE_UCNV_FLUSH_CACHE,
    UTRACE_UCNV_LOAD,
    UTRACE_UCNV_UNLOAD,
    UTRACE_CONVERSION_LIMIT,

    UTRACE_COLLATION_START=0x2000,
    UTRACE_UCOL_OPEN=UTRACE_COLLATION_START,
    UTRACE_UCOL_CLOSE,
    UTRACE_UCOL_STRCOLL,
    UTRACE_UCOL_GET_SORTKEY,
    UTRACE_UCOL_GETLOCALE,
    UTRACE_UCOL_NEXTSORTKEYPART,
    UTRACE_UCOL_STRCOLLITER,
    UTRACE_COLLATION_LIMIT
} UTraceFunctionNumber;

U_CDECL_END

#endif

Index: brkiter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/brkiter.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- brkiter.h	10 Sep 2003 02:42:04 -0000	1.1
+++ brkiter.h	6 Apr 2004 10:08:07 -0000	1.2
@@ -270,7 +270,11 @@
      * boundaries have been returned.
      * @stable ICU 2.0
      */
+#ifdef U_CYGWIN
+    static U_COMMON_API const int32_t DONE;
+#else
     static const int32_t DONE;
+#endif
 
     /**
      * Return the index of the first character in the text being scanned.
@@ -527,7 +531,7 @@
 
     /**
      * Register a new break iterator of the indicated kind, to use in the given locale.
-     * The break iterator will be adoped.  Clones of the iterator will be returned
+     * The break iterator will be adopted.  Clones of the iterator will be returned
      * if a request for a break iterator of the given kind matches or falls back to
      * this locale.
      * @param toAdopt the BreakIterator instance to be adopted
@@ -558,6 +562,21 @@
      */
     static StringEnumeration* getAvailableLocales(void);
 
+    /**
+     * Returns the locale for this break iterator. Two flavors are available: valid and 
+     * actual locale. 
+     * @draft ICU 2.8 likely to change in ICU 3.0, based on feedback
+     */
+    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+
+    /** Get the locale for this break iterator object. You can choose between valid and actual locale.
+     *  @param type type of the locale we're looking for (valid or actual) 
+     *  @param status error code for the operation
+     *  @return the locale
+     *  @internal
+     */
+    const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
+
  private:
     static BreakIterator* makeCharacterInstance(const Locale& loc, UErrorCode& status);
     static BreakIterator* makeWordInstance(const Locale& loc, UErrorCode& status);
@@ -578,7 +597,13 @@
     UBool fBufferClone;
     /** @internal */
     BreakIterator (const BreakIterator &other) : UObject(other), fBufferClone(FALSE) {}
+
 private:
+
+    /** @internal */
+    char actualLocale[ULOC_FULLNAME_CAPACITY];
+    char validLocale[ULOC_FULLNAME_CAPACITY];
+
     /**
      * The assignment operator has no real implementation.
      * It's provided to make the compiler happy. Do not call.

Index: caniter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/caniter.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- caniter.h	10 Sep 2003 02:42:04 -0000	1.1
+++ caniter.h	6 Apr 2004 10:08:07 -0000	1.2
@@ -121,18 +121,18 @@
     static void permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);     
     
     /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     * ICU "poor man's RTTI", returns a UClassID for this class.
      *
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
-    virtual inline UClassID getDynamicClassID() const;
+    static UClassID getStaticClassID();
 
     /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
      *
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
-    static inline UClassID getStaticClassID();
+    virtual UClassID getDynamicClassID() const;
 
 private:
     // ===================== PRIVATES ==============================
@@ -187,20 +187,7 @@
 
     void cleanPieces();
 
-    /**
-     * The address of this static class variable serves as this class's ID
-     * for ICU "poor man's RTTI".
-     */
-    static const char fgClassID;
 };
-
-inline UClassID
-CanonicalIterator::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-CanonicalIterator::getDynamicClassID() const 
-{ return CanonicalIterator::getStaticClassID(); }
 
 U_NAMESPACE_END
 

Index: chariter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/chariter.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- chariter.h	10 Sep 2003 02:42:04 -0000	1.4
+++ chariter.h	6 Apr 2004 10:08:07 -0000	1.5
@@ -1,7 +1,7 @@
 /*
 ********************************************************************
 *
-*   Copyright (C) 1997-2002, International Business Machines
+*   Copyright (C) 1997-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ********************************************************************
@@ -84,100 +84,100 @@
  */
 class U_COMMON_API ForwardCharacterIterator : public UObject {
 public:
-  /**
-   * Value returned by most of ForwardCharacterIterator's functions
-   * when the iterator has reached the limits of its iteration.
-   * @stable ICU 2.0
-   */
-  enum { DONE = 0xffff };
-
-  /**
-   * Destructor.  
-   * @stable ICU 2.0
-   */
-  virtual ~ForwardCharacterIterator() {}
-
-  /**
-   * Returns true when both iterators refer to the same
-   * character in the same character-storage object.  
-   * @param that The ForwardCharacterIterator to be compared for equality
-   * @return true when both iterators refer to the same
-   * character in the same character-storage object
-   * @stable ICU 2.0
-   */
-  virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
-        
-  /**
-   * Returns true when the iterators refer to different
-   * text-storage objects, or to different characters in the
-   * same text-storage object.  
-   * @param that The ForwardCharacterIterator to be compared for inequality
-   * @Returns true when the iterators refer to different
-   * text-storage objects, or to different characters in the
-   * same text-storage object
-   * @stable ICU 2.0
-   */
-  inline UBool operator!=(const ForwardCharacterIterator& that) const;
-
-  /**
-   * Generates a hash code for this iterator.  
-   * @return the hash code.
-   * @stable ICU 2.0
-   */
-  virtual int32_t hashCode(void) const = 0;
-        
-  /**
-   * Returns a UClassID for this ForwardCharacterIterator ("poor man's
-   * RTTI").<P> Despite the fact that this function is public,
-   * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! 
-   * @Returns a UClassID for this ForwardCharacterIterator 
-   * @stable ICU 2.0
-   */
-  virtual UClassID getDynamicClassID(void) const = 0;
-
-  /**
-   * Gets the current code unit for returning and advances to the next code unit
-   * in the iteration range
-   * (toward endIndex()).  If there are
-   * no more code units to return, returns DONE.
-   * @return the current code unit.
-   * @stable ICU 2.0
-   */
-  virtual UChar         nextPostInc(void) = 0;
-        
-  /**
-   * Gets the current code point for returning and advances to the next code point
-   * in the iteration range
-   * (toward endIndex()).  If there are
-   * no more code points to return, returns DONE.
-   * @return the current code point.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       next32PostInc(void) = 0;
-        
-  /**
-   * Returns FALSE if there are no more code units or code points
-   * at or after the current position in the iteration range.
-   * This is used with nextPostInc() or next32PostInc() in forward
-   * iteration.
-   * @returns FALSE if there are no more code units or code points
-   * at or after the current position in the iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UBool        hasNext() = 0;
-
+    /**
+     * Value returned by most of ForwardCharacterIterator's functions
+     * when the iterator has reached the limits of its iteration.
+     * @stable ICU 2.0
+     */
+    enum { DONE = 0xffff };
+    
+    /**
+     * Destructor.  
+     * @stable ICU 2.0
+     */
+    virtual ~ForwardCharacterIterator();
+    
+    /**
+     * Returns true when both iterators refer to the same
+     * character in the same character-storage object.  
+     * @param that The ForwardCharacterIterator to be compared for equality
+     * @return true when both iterators refer to the same
+     * character in the same character-storage object
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
+    
+    /**
+     * Returns true when the iterators refer to different
+     * text-storage objects, or to different characters in the
+     * same text-storage object.  
+     * @param that The ForwardCharacterIterator to be compared for inequality
+     * @Returns true when the iterators refer to different
+     * text-storage objects, or to different characters in the
+     * same text-storage object
+     * @stable ICU 2.0
+     */
+    inline UBool operator!=(const ForwardCharacterIterator& that) const;
+    
+    /**
+     * Generates a hash code for this iterator.  
+     * @return the hash code.
+     * @stable ICU 2.0
+     */
+    virtual int32_t hashCode(void) const = 0;
+    
+    /**
+     * Returns a UClassID for this ForwardCharacterIterator ("poor man's
+     * RTTI").<P> Despite the fact that this function is public,
+     * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! 
+     * @Returns a UClassID for this ForwardCharacterIterator 
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const = 0;
+    
+    /**
+     * Gets the current code unit for returning and advances to the next code unit
+     * in the iteration range
+     * (toward endIndex()).  If there are
+     * no more code units to return, returns DONE.
+     * @return the current code unit.
+     * @stable ICU 2.0
+     */
+    virtual UChar         nextPostInc(void) = 0;
+    
+    /**
+     * Gets the current code point for returning and advances to the next code point
+     * in the iteration range
+     * (toward endIndex()).  If there are
+     * no more code points to return, returns DONE.
+     * @return the current code point.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       next32PostInc(void) = 0;
+    
+    /**
+     * Returns FALSE if there are no more code units or code points
+     * at or after the current position in the iteration range.
+     * This is used with nextPostInc() or next32PostInc() in forward
+     * iteration.
+     * @returns FALSE if there are no more code units or code points
+     * at or after the current position in the iteration range.
+     * @stable ICU 2.0
+     */
+    virtual UBool        hasNext() = 0;
+    
 protected:
-  /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
-  ForwardCharacterIterator() : UObject() {}
-  
-  /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
-  ForwardCharacterIterator(const ForwardCharacterIterator &other) : UObject(other) {}
-  
-  /**
-   * Assignment operator to be overridden in the implementing class.
-   * @stable ICU 2.0
-   */
-  ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
+    /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
+    ForwardCharacterIterator() : UObject() {}
+    
+    /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
+    ForwardCharacterIterator(const ForwardCharacterIterator &other) : UObject(other) {}
+    
+    /**
+     * Assignment operator to be overridden in the implementing class.
+     * @stable ICU 2.0
+     */
+    ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
 };
 
 /**
@@ -351,361 +351,361 @@
  */
 class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
 public:
-  /**
-   * Origin enumeration for the move() and move32() functions.
-   * @stable ICU 2.0
-   */
-  enum EOrigin { kStart, kCurrent, kEnd };
+    /**
+     * Origin enumeration for the move() and move32() functions.
+     * @stable ICU 2.0
+     */
+    enum EOrigin { kStart, kCurrent, kEnd };
 
-  /**
-   * Returns a pointer to a new CharacterIterator of the same
-   * concrete class as this one, and referring to the same
-   * character in the same text-storage object as this one.  The
-   * caller is responsible for deleting the new clone.  
-   * @return a pointer to a new CharacterIterator
-   * @stable ICU 2.0
-   */
-  virtual CharacterIterator* clone(void) const = 0;
+    /**
+     * Returns a pointer to a new CharacterIterator of the same
+     * concrete class as this one, and referring to the same
+     * character in the same text-storage object as this one.  The
+     * caller is responsible for deleting the new clone.  
+     * @return a pointer to a new CharacterIterator
+     * @stable ICU 2.0
+     */
+    virtual CharacterIterator* clone(void) const = 0;
 
-  /**
-   * Sets the iterator to refer to the first code unit in its
-   * iteration range, and returns that code unit.
-   * This can be used to begin an iteration with next().
-   * @return the first code unit in its iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UChar         first(void) = 0;
+    /**
+     * Sets the iterator to refer to the first code unit in its
+     * iteration range, and returns that code unit.
+     * This can be used to begin an iteration with next().
+     * @return the first code unit in its iteration range.
+     * @stable ICU 2.0
+     */
+    virtual UChar         first(void) = 0;
 
-  /**
-   * Sets the iterator to refer to the first code unit in its
-   * iteration range, returns that code unit, and moves the position
-   * to the second code unit. This is an alternative to setToStart()
-   * for forward iteration with nextPostInc().
-   * @return the first code unit in its iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UChar         firstPostInc(void);
+    /**
+     * Sets the iterator to refer to the first code unit in its
+     * iteration range, returns that code unit, and moves the position
+     * to the second code unit. This is an alternative to setToStart()
+     * for forward iteration with nextPostInc().
+     * @return the first code unit in its iteration range.
+     * @stable ICU 2.0
+     */
+    virtual UChar         firstPostInc(void);
 
-  /**
-   * Sets the iterator to refer to the first code point in its
-   * iteration range, and returns that code unit,
-   * This can be used to begin an iteration with next32().
-   * Note that an iteration with next32PostInc(), beginning with,
-   * e.g., setToStart() or firstPostInc(), is more efficient.
-   * @return the first code point in its iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       first32(void) = 0;
+    /**
+     * Sets the iterator to refer to the first code point in its
+     * iteration range, and returns that code unit,
+     * This can be used to begin an iteration with next32().
+     * Note that an iteration with next32PostInc(), beginning with,
+     * e.g., setToStart() or firstPostInc(), is more efficient.
+     * @return the first code point in its iteration range.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       first32(void) = 0;
 
-  /**
-   * Sets the iterator to refer to the first code point in its
-   * iteration range, returns that code point, and moves the position
-   * to the second code point. This is an alternative to setToStart()
-   * for forward iteration with next32PostInc().
-   * @return the first code point in its iteration range.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       first32PostInc(void);
+    /**
+     * Sets the iterator to refer to the first code point in its
+     * iteration range, returns that code point, and moves the position
+     * to the second code point. This is an alternative to setToStart()
+     * for forward iteration with next32PostInc().
+     * @return the first code point in its iteration range.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       first32PostInc(void);
 
-  /**
-   * Sets the iterator to refer to the first code unit or code point in its
-   * iteration range. This can be used to begin a forward
-   * iteration with nextPostInc() or next32PostInc().
-   * @return the start position of the iteration range
-   * @stable ICU 2.0
-   */
-  inline int32_t    setToStart();
+    /**
+     * Sets the iterator to refer to the first code unit or code point in its
+     * iteration range. This can be used to begin a forward
+     * iteration with nextPostInc() or next32PostInc().
+     * @return the start position of the iteration range
+     * @stable ICU 2.0
+     */
+    inline int32_t    setToStart();
 
-  /**
-   * Sets the iterator to refer to the last code unit in its
-   * iteration range, and returns that code unit.
-   * This can be used to begin an iteration with previous().
-   * @return the last code unit.
-   * @stable ICU 2.0
-   */
-  virtual UChar         last(void) = 0;
+    /**
+     * Sets the iterator to refer to the last code unit in its
+     * iteration range, and returns that code unit.
+     * This can be used to begin an iteration with previous().
+     * @return the last code unit.
+     * @stable ICU 2.0
+     */
+    virtual UChar         last(void) = 0;
         
-  /**
-   * Sets the iterator to refer to the last code point in its
-   * iteration range, and returns that code unit.
-   * This can be used to begin an iteration with previous32().
-   * @return the last code point.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       last32(void) = 0;
+    /**
+     * Sets the iterator to refer to the last code point in its
+     * iteration range, and returns that code unit.
+     * This can be used to begin an iteration with previous32().
+     * @return the last code point.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       last32(void) = 0;
 
-  /**
-   * Sets the iterator to the end of its iteration range, just behind
-   * the last code unit or code point. This can be used to begin a backward
-   * iteration with previous() or previous32().
-   * @return the end position of the iteration range
-   * @stable ICU 2.0
-   */
-  inline int32_t    setToEnd();
+    /**
+     * Sets the iterator to the end of its iteration range, just behind
+     * the last code unit or code point. This can be used to begin a backward
+     * iteration with previous() or previous32().
+     * @return the end position of the iteration range
+     * @stable ICU 2.0
+     */
+    inline int32_t    setToEnd();
 
-  /**
-   * Sets the iterator to refer to the "position"-th code unit
-   * in the text-storage object the iterator refers to, and
-   * returns that code unit.  
-   * @param position the "position"-th code unit in the text-storage object
-   * @return the "position"-th code unit.
-   * @stable ICU 2.0
-   */
-  virtual UChar         setIndex(int32_t position) = 0;
+    /**
+     * Sets the iterator to refer to the "position"-th code unit
+     * in the text-storage object the iterator refers to, and
+     * returns that code unit.  
+     * @param position the "position"-th code unit in the text-storage object
+     * @return the "position"-th code unit.
+     * @stable ICU 2.0
+     */
+    virtual UChar         setIndex(int32_t position) = 0;
 
-  /**
-   * Sets the iterator to refer to the beginning of the code point
-   * that contains the "position"-th code unit
-   * in the text-storage object the iterator refers to, and
-   * returns that code point.
-   * The current position is adjusted to the beginning of the code point
-   * (its first code unit).
-   * @param position the "position"-th code unit in the text-storage object
-   * @return the "position"-th code point.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       setIndex32(int32_t position) = 0;
+    /**
+     * Sets the iterator to refer to the beginning of the code point
+     * that contains the "position"-th code unit
+     * in the text-storage object the iterator refers to, and
+     * returns that code point.
+     * The current position is adjusted to the beginning of the code point
+     * (its first code unit).
+     * @param position the "position"-th code unit in the text-storage object
+     * @return the "position"-th code point.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       setIndex32(int32_t position) = 0;
 
-  /**
-   * Returns the code unit the iterator currently refers to. 
-   * @return the current code unit. 
-   * @stable ICU 2.0
-   */
-  virtual UChar         current(void) const = 0;
+    /**
+     * Returns the code unit the iterator currently refers to. 
+     * @return the current code unit. 
+     * @stable ICU 2.0
+     */
+    virtual UChar         current(void) const = 0;
         
-  /**
-   * Returns the code point the iterator currently refers to.  
-   * @return the current code point.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       current32(void) const = 0;
+    /**
+     * Returns the code point the iterator currently refers to.  
+     * @return the current code point.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       current32(void) const = 0;
         
-  /**
-   * Advances to the next code unit in the iteration range
-   * (toward endIndex()), and returns that code unit.  If there are
-   * no more code units to return, returns DONE.
-   * @return the next code unit.
-   * @stable ICU 2.0
-   */
-  virtual UChar         next(void) = 0;
+    /**
+     * Advances to the next code unit in the iteration range
+     * (toward endIndex()), and returns that code unit.  If there are
+     * no more code units to return, returns DONE.
+     * @return the next code unit.
+     * @stable ICU 2.0
+     */
+    virtual UChar         next(void) = 0;
         
-  /**
-   * Advances to the next code point in the iteration range
-   * (toward endIndex()), and returns that code point.  If there are
-   * no more code points to return, returns DONE.
-   * Note that iteration with "pre-increment" semantics is less
-   * efficient than iteration with "post-increment" semantics
-   * that is provided by next32PostInc().
-   * @return the next code point.
-   * @stable ICU 2.0
-   */
-  virtual UChar32       next32(void) = 0;
+    /**
+     * Advances to the next code point in the iteration range
+     * (toward endIndex()), and returns that code point.  If there are
+     * no more code points to return, returns DONE.
+     * Note that iteration with "pre-increment" semantics is less
+     * efficient than iteration with "post-increment" semantics
+     * that is provided by next32PostInc().
+     * @return the next code point.
+     * @stable ICU 2.0
+     */
+    virtual UChar32       next32(void) = 0;
         
-  /**
-   * Advances to the previous code unit in the iteration range
-   * (toward startIndex()), and returns that code unit.  If there are
-   * no more code units to return, returns DONE.  
-   * @return the previous code unit.
-   * @stable ICU 2.0
-   */
-  virtual UChar         previous(void) = 0;
+    /**
+     * Advances to the previous code unit in the iteration range
+     * (toward startIndex()), and returns that code unit.  If there are
+     * no more code units to return, returns DONE.  
+     * @return the previous code unit.
+     * @stable ICU 2.0
+     */
+    virtual UChar         previous(void) = 0;
 
-  /**
-   * Advances to the previous code point in the iteration range
-   * (toward startIndex()), and returns that code point.  If there are
-   * no more code points to return, returns DONE. 
-   * @return the previous code point. 
-   * @stable ICU 2.0
-   */
-  virtual UChar32       previous32(void) = 0;
+    /**
+     * Advances to the previous code point in the iteration range
+     * (toward startIndex()), and returns that code point.  If there are
+     * no more code points to return, returns DONE. 
+     * @return the previous code point. 
+     * @stable ICU 2.0
+     */
+    virtual UChar32       previous32(void) = 0;
 
-  /**
-   * Returns FALSE if there are no more code units or code points
-   * before the current position in the iteration range.
-   * This is used with previous() or previous32() in backward
-   * iteration.
-   * @return FALSE if there are no more code units or code points
-   * before the current position in the iteration range, return TRUE otherwise.
-   * @stable ICU 2.0
-   */
-  virtual UBool        hasPrevious() = 0;
+    /**
+     * Returns FALSE if there are no more code units or code points
+     * before the current position in the iteration range.
+     * This is used with previous() or previous32() in backward
+     * iteration.
+     * @return FALSE if there are no more code units or code points
+     * before the current position in the iteration range, return TRUE otherwise.
+     * @stable ICU 2.0
+     */
+    virtual UBool        hasPrevious() = 0;
 
-  /**
-   * Returns the numeric index in the underlying text-storage
-   * object of the character returned by first().  Since it's
-   * possible to create an iterator that iterates across only
-   * part of a text-storage object, this number isn't
-   * necessarily 0.  
-   * @returns the numeric index in the underlying text-storage
-   * object of the character returned by first().
-   * @stable ICU 2.0
-   */
-  inline int32_t       startIndex(void) const;
+    /**
+     * Returns the numeric index in the underlying text-storage
+     * object of the character returned by first().  Since it's
+     * possible to create an iterator that iterates across only
+     * part of a text-storage object, this number isn't
+     * necessarily 0.  
+     * @returns the numeric index in the underlying text-storage
+     * object of the character returned by first().
+     * @stable ICU 2.0
+     */
+    inline int32_t       startIndex(void) const;
         
-  /**
-   * Returns the numeric index in the underlying text-storage
-   * object of the position immediately BEYOND the character
-   * returned by last().  
-   * @return the numeric index in the underlying text-storage
-   * object of the position immediately BEYOND the character
-   * returned by last().
-   * @stable ICU 2.0
-   */
-  inline int32_t       endIndex(void) const;
+    /**
+     * Returns the numeric index in the underlying text-storage
+     * object of the position immediately BEYOND the character
+     * returned by last().  
+     * @return the numeric index in the underlying text-storage
+     * object of the position immediately BEYOND the character
+     * returned by last().
+     * @stable ICU 2.0
+     */
+    inline int32_t       endIndex(void) const;
         
-  /**
-   * Returns the numeric index in the underlying text-storage
-   * object of the character the iterator currently refers to
-   * (i.e., the character returned by current()).  
-   * @return the numberic index in the text-storage object of 
-   * the character the iterator currently refers to
-   * @stable ICU 2.0
-   */
-  inline int32_t       getIndex(void) const;
+    /**
+     * Returns the numeric index in the underlying text-storage
+     * object of the character the iterator currently refers to
+     * (i.e., the character returned by current()).  
+     * @return the numberic index in the text-storage object of 
+     * the character the iterator currently refers to
+     * @stable ICU 2.0
+     */
+    inline int32_t       getIndex(void) const;
 
-  /**
-   * Returns the length of the entire text in the underlying
-   * text-storage object.
-   * @return the length of the entire text in the text-storage object
-   * @stable ICU 2.0
-   */
-  inline int32_t           getLength() const;
+    /**
+     * Returns the length of the entire text in the underlying
+     * text-storage object.
+     * @return the length of the entire text in the text-storage object
+     * @stable ICU 2.0
+     */
+    inline int32_t           getLength() const;
 
-  /**
-   * Moves the current position relative to the start or end of the
-   * iteration range, or relative to the current position itself.
-   * The movement is expressed in numbers of code units forward
-   * or backward by specifying a positive or negative delta.
-   * @delta the position relative to origin. A positive delta means forward;
-   * a negative delta means backward.
-   * @origin Origin enumeration {kStart, kCurrent, kEnd}
-   * @return the new position
-   * @stable ICU 2.0
-   */
-  virtual int32_t      move(int32_t delta, EOrigin origin) = 0;
+    /**
+     * Moves the current position relative to the start or end of the
+     * iteration range, or relative to the current position itself.
+     * The movement is expressed in numbers of code units forward
+     * or backward by specifying a positive or negative delta.
+     * @delta the position relative to origin. A positive delta means forward;
+     * a negative delta means backward.
+     * @origin Origin enumeration {kStart, kCurrent, kEnd}
+     * @return the new position
+     * @stable ICU 2.0
+     */
+    virtual int32_t      move(int32_t delta, EOrigin origin) = 0;
 
-  /**
-   * Moves the current position relative to the start or end of the
-   * iteration range, or relative to the current position itself.
-   * The movement is expressed in numbers of code points forward
-   * or backward by specifying a positive or negative delta.
-   * @delta the position relative to origin. A positive delta means forward;
-   * a negative delta means backward.
-   * @origin Origin enumeration {kStart, kCurrent, kEnd}
-   * @return the new position
-   * @stable ICU 2.0
-   */
-  virtual int32_t      move32(int32_t delta, EOrigin origin) = 0;
+    /**
+     * Moves the current position relative to the start or end of the
+     * iteration range, or relative to the current position itself.
+     * The movement is expressed in numbers of code points forward
+     * or backward by specifying a positive or negative delta.
+     * @delta the position relative to origin. A positive delta means forward;
+     * a negative delta means backward.
+     * @origin Origin enumeration {kStart, kCurrent, kEnd}
+     * @return the new position
+     * @stable ICU 2.0
+     */
+    virtual int32_t      move32(int32_t delta, EOrigin origin) = 0;
 
-  /**
-   * Copies the text under iteration into the UnicodeString
-   * referred to by "result".  
-   * @param result Receives a copy of the text under iteration.  
-   * @stable ICU 2.0
-   */
-  virtual void            getText(UnicodeString&  result) = 0;
+    /**
+     * Copies the text under iteration into the UnicodeString
+     * referred to by "result".  
+     * @param result Receives a copy of the text under iteration.  
+     * @stable ICU 2.0
+     */
+    virtual void            getText(UnicodeString&  result) = 0;
 
 protected:
-  /**
-   * Empty constructor.
-   * @stable ICU 2.0
-   */
-  CharacterIterator();
+    /**
+     * Empty constructor.
+     * @stable ICU 2.0
+     */
+    CharacterIterator();
 
-  /**
-   * Constructor, just setting the length field in this base class.
-   * @stable ICU 2.0
-   */
-  CharacterIterator(int32_t length);
+    /**
+     * Constructor, just setting the length field in this base class.
+     * @stable ICU 2.0
+     */
+    CharacterIterator(int32_t length);
 
-  /**
-   * Constructor, just setting the length and position fields in this base class.
-   * @stable ICU 2.0
-   */
-  CharacterIterator(int32_t length, int32_t position);
+    /**
+     * Constructor, just setting the length and position fields in this base class.
+     * @stable ICU 2.0
+     */
+    CharacterIterator(int32_t length, int32_t position);
 
-  /**
-   * Constructor, just setting the length, start, end, and position fields in this base class.
-   * @stable ICU 2.0
-   */
-  CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
+    /**
+     * Constructor, just setting the length, start, end, and position fields in this base class.
+     * @stable ICU 2.0
+     */
+    CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
   
-  /**
-   * Copy constructor.
-   *
-   * @param that The CharacterIterator to be copied
-   * @stable ICU 2.0
-   */
-  CharacterIterator(const CharacterIterator &that);
+    /**
+     * Copy constructor.
+     *
+     * @param that The CharacterIterator to be copied
+     * @stable ICU 2.0
+     */
+    CharacterIterator(const CharacterIterator &that);
 
-  /**
-   * Assignment operator.  Sets this CharacterIterator to have the same behavior,
-   * as the one passed in.
-   * @param that The CharacterIterator passed in.
-   * @return the newly set CharacterIterator.
-   * @stable ICU 2.0
-   */
-  CharacterIterator &operator=(const CharacterIterator &that);
+    /**
+     * Assignment operator.  Sets this CharacterIterator to have the same behavior,
+     * as the one passed in.
+     * @param that The CharacterIterator passed in.
+     * @return the newly set CharacterIterator.
+     * @stable ICU 2.0
+     */
+    CharacterIterator &operator=(const CharacterIterator &that);
 
-  /**
-   * Base class text length field.
-   * Necessary this for correct getText() and hashCode().
-   * @stable ICU 2.0
-   */
-  int32_t textLength;
+    /**
+     * Base class text length field.
+     * Necessary this for correct getText() and hashCode().
+     * @stable ICU 2.0
+     */
+    int32_t textLength;
 
-  /**
-   * Base class field for the current position.
-   * @stable ICU 2.0
-   */
-  int32_t  pos;
+    /**
+     * Base class field for the current position.
+     * @stable ICU 2.0
+     */
+    int32_t  pos;
 
-  /**
-   * Base class field for the start of the iteration range.
-   * @stable ICU 2.0
-   */
-  int32_t  begin;
+    /**
+     * Base class field for the start of the iteration range.
+     * @stable ICU 2.0
+     */
+    int32_t  begin;
 
-  /**
-   * Base class field for the end of the iteration range.
-   * @stable ICU 2.0
-   */
-  int32_t  end;
+    /**
+     * Base class field for the end of the iteration range.
+     * @stable ICU 2.0
+     */
+    int32_t  end;
 };
 
 inline UBool
 ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
-  return !operator==(that);
+    return !operator==(that);
 }
 
 inline int32_t
 CharacterIterator::setToStart() {
-  return move(0, kStart);
+    return move(0, kStart);
 }
 
 inline int32_t
 CharacterIterator::setToEnd() {
-  return move(0, kEnd);
+    return move(0, kEnd);
 }
 
 inline int32_t
 CharacterIterator::startIndex(void) const {
-  return begin;
+    return begin;
 }
 
 inline int32_t
 CharacterIterator::endIndex(void) const {
-  return end;
+    return end;
 }
 
 inline int32_t
 CharacterIterator::getIndex(void) const {
-  return pos;
+    return pos;
 }
 
 inline int32_t
 CharacterIterator::getLength(void) const {
-  return textLength;
+    return textLength;
 }
 
 U_NAMESPACE_END

Index: dbbi.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/dbbi.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- dbbi.h	10 Sep 2003 02:42:04 -0000	1.1
+++ dbbi.h	6 Apr 2004 10:08:07 -0000	1.2
@@ -84,11 +84,6 @@
 
     DictionaryBasedBreakIteratorTables  *fTables;
 
-    /**
-     * Class ID
-     */
-    static const char fgClassID;
-
     /**=======================================================================
      * Create a dictionary based break boundary detection iterator.  
      * @param tablesImage The location for the dictionary to be loaded into memory
@@ -176,6 +171,19 @@
     virtual int32_t preceding(int32_t offset);
 
     /**
+     * Returns the class ID for this class.  This is useful only for
+     * comparing to a return value from getDynamicClassID().  For example:
+     *
+     *      Base* polymorphic_pointer = createPolymorphicObject();
+     *      if (polymorphic_pointer->getDynamicClassID() ==
+     *          Derived::getStaticClassID()) ...
+     *
+     * @return          The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID getStaticClassID(void);
+
+    /**
      * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
      * This method is to implement a simple version of RTTI, since not all
      * C++ compilers support genuine RTTI.  Polymorphic operator==() and
@@ -188,19 +196,6 @@
      */
     virtual UClassID getDynamicClassID(void) const;
 
-    /**
-     * Returns the class ID for this class.  This is useful only for
-     * comparing to a return value from getDynamicClassID().  For example:
-     *
-     *      Base* polymorphic_pointer = createPolymorphicObject();
-     *      if (polymorphic_pointer->getDynamicClassID() ==
-     *          Derived::getStaticClassID()) ...
-     *
-     * @return          The class ID for all objects of this class.
-     * @stable ICU 2.0
-     */
-    static inline UClassID getStaticClassID(void);
-
 protected:
     //=======================================================================
     // implementation
@@ -269,14 +264,6 @@
     friend class DictionaryBasedBreakIteratorTables;
     friend class BreakIterator;
 };
-
-inline UClassID
-DictionaryBasedBreakIterator::getStaticClassID(void)
-{ return (UClassID)(&fgClassID); }
-
-inline UClassID
-DictionaryBasedBreakIterator::getDynamicClassID(void) const
-{ return DictionaryBasedBreakIterator::getStaticClassID(); }
 
 U_NAMESPACE_END
 

Index: locid.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/locid.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- locid.h	10 Sep 2003 02:42:04 -0000	1.4
+++ locid.h	6 Apr 2004 10:08:07 -0000	1.5
@@ -34,6 +34,7 @@
 #include "unicode/unistr.h"
 #include "unicode/putil.h"
 #include "unicode/uloc.h"
+#include "unicode/strenum.h"
 
 /**
  * \file
@@ -250,6 +251,8 @@
      * @param country  Uppercase two-letter ISO-3166 code. (optional)
      * @param variant  Uppercase vendor and browser specific code. See class
      *                 description. (optional)
+     * @param keywordsAndValues A string consisting of keyword/values pairs, such as
+     *                 "collation=phonebook;currency=euro"
      *
      * @see getDefault
      * @see uloc_getDefault
@@ -257,7 +260,8 @@
      */
     Locale( const   char * language,
             const   char * country  = 0, 
-            const   char * variant  = 0);
+            const   char * variant  = 0,
+            const   char * keywordsAndValues = 0);
 
     /**
      * Initializes a Locale object from another Locale object.
@@ -303,6 +307,19 @@
     UBool   operator!=(const    Locale&     other) const;
 
     /**
+     * Clone this object.
+     * Clones can be used concurrently in multiple threads.
+     * If an error occurs, then NULL is returned.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @draft ICU 2.8
+     */
+    Locale *clone() const;
+
+    /**
      * Common methods of getting the current default Locale. Used for the
      * presentation: menus, dialogs, etc. Generally set once when your applet or
      * application is initialized, then never reset. (If you do reset the
@@ -325,13 +342,14 @@
      * setDefault() only changes ICU's default locale ID, <strong>not</strong>
      * the default locale ID of the runtime environment.
      *
-     * @param newLocale Locale to set to.
+     * @param newLocale Locale to set to.  If NULL, set to the value obtained
+     *                  from the runtime environement.
      * @param success The error code.
      * @system
      * @stable ICU 2.0
      */
     static  void    setDefault(const    Locale&     newLocale,
-                                                    UErrorCode&  success);
+                               UErrorCode&  success);
 
     
     /**
@@ -354,6 +372,15 @@
     inline const char *  getLanguage( ) const;
 
     /**
+     * Returns the locale's ISO-15924 abbreviation script code.
+     * @return      An alias to the code
+     * @see uscript_getShortName
+     * @see uscript_getCode
+     * @draft ICU 2.8
+     */
+    inline const char *  getScript( ) const;
+
+    /**
      * Returns the locale's ISO-3166 country code.
      * @return      An alias to the code
      * @stable ICU 2.0
@@ -378,6 +405,37 @@
     inline const char * getName() const;
 
     /**
+     * Returns the programmatic name of the entire locale as getName would return,
+     * but without keywords.
+     * @return      A pointer to "name".
+     * @see getName
+     * @draft ICU 2.8
+     */
+    const char * getBaseName() const;
+
+
+    /**
+     * Gets the list of keywords for the specified locale. 
+     *
+     * @return pointer to StringEnumeration class. Client must dispose of it by calling delete.
+     * @param status Returns any error information while performing this operation.
+     * @draft ICU 2.8
+     */
+    StringEnumeration * createKeywords(UErrorCode &status) const;
+
+    /**
+     * Get the value for a keyword. 
+     * 
+     * @param keywordName name of the keyword for which we want the value. Case insensitive.
+     * @param status Returns any error information while performing this operation.
+     * @return pointer to the keyword value owned by the Locale object or NULL if there is 
+     *         no such a keyword.
+     *
+     * @draft ICU 2.8
+     */
+     int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const;
+
+    /**
      * returns the locale's three-letter language code, as specified
      * in ISO draft standard ISO-639-2..
      * @return      An alias to the code, or NULL
@@ -414,21 +472,49 @@
 
     /**
      * Fills in "dispLang" with the name of this locale's language in a format suitable for
-     * user display in the locale specified by "inLocale".  For example, if the locale's
-     * language code is "en" and inLocale's language code is "fr", this function would set
+     * user display in the locale specified by "displayLocale".  For example, if the locale's
+     * language code is "en" and displayLocale's language code is "fr", this function would set
      * dispLang to "Anglais".
-     * @param inLocale  Specifies the locale to be used to display the name.  In other words,
+     * @param displayLocale  Specifies the locale to be used to display the name.  In other words,
      *                  if the locale's language code is "en", passing Locale::getFrench() for
-     *                  inLocale would result in "Anglais", while passing Locale::getGerman()
-     *                  for inLocale would result in "Englisch".
+     *                  displayLocale would result in "Anglais", while passing Locale::getGerman()
+     *                  for displayLocale would result in "Englisch".
      * @param dispLang  Receives the language's display name.
      * @return          A reference to "dispLang".
      * @stable ICU 2.0
      */
-    UnicodeString&  getDisplayLanguage( const   Locale&         inLocale,
+    UnicodeString&  getDisplayLanguage( const   Locale&         displayLocale,
                                                 UnicodeString&  dispLang) const;
 
     /**
+     * Fills in "dispScript" with the name of this locale's script in a format suitable
+     * for user display in the default locale.  For example, if the locale's script code
+     * is "LATN" and the default locale's language code is "en", this function would set
+     * dispScript to "Latin".
+     * @param dispScript    Receives the scripts's display name.
+     * @return              A reference to "dispScript".
+     * @draft ICU 2.8
+     */
+    UnicodeString&  getDisplayScript(          UnicodeString& dispScript) const;
+
+    /**
+     * Fills in "dispScript" with the name of this locale's country in a format suitable
+     * for user display in the locale specified by "displayLocale".  For example, if the locale's
+     * script code is "LATN" and displayLocale's language code is "en", this function would set
+     * dispScript to "Latin".
+     * @param displayLocale      Specifies the locale to be used to display the name.  In other
+     *                      words, if the locale's script code is "LATN", passing
+     *                      Locale::getFrench() for displayLocale would result in "", while
+     *                      passing Locale::getGerman() for displayLocale would result in
+     *                      "".
+     * @param dispScript    Receives the scripts's display name.
+     * @return              A reference to "dispScript".
+     * @draft ICU 2.8
+     */
+    UnicodeString&  getDisplayScript(  const   Locale&         displayLocale,
+                                               UnicodeString&  dispScript) const;
+
+    /**
      * Fills in "dispCountry" with the name of this locale's country in a format suitable
      * for user display in the default locale.  For example, if the locale's country code
      * is "FR" and the default locale's language code is "en", this function would set
@@ -441,19 +527,19 @@
 
     /**
      * Fills in "dispCountry" with the name of this locale's country in a format suitable
-     * for user display in the locale specified by "inLocale".  For example, if the locale's
-     * country code is "US" and inLocale's language code is "fr", this function would set
+     * for user display in the locale specified by "displayLocale".  For example, if the locale's
+     * country code is "US" and displayLocale's language code is "fr", this function would set
      * dispCountry to "Etats-Unis".
-     * @param inLocale      Specifies the locale to be used to display the name.  In other
+     * @param displayLocale      Specifies the locale to be used to display the name.  In other
      *                      words, if the locale's country code is "US", passing
-     *                      Locale::getFrench() for inLocale would result in "États-Unis", while
-     *                      passing Locale::getGerman() for inLocale would result in
+     *                      Locale::getFrench() for displayLocale would result in "États-Unis", while
+     *                      passing Locale::getGerman() for displayLocale would result in
      *                      "Vereinigte Staaten".
      * @param dispCountry   Receives the country's display name.
      * @return              A reference to "dispCountry".
      * @stable ICU 2.0
      */
-    UnicodeString&  getDisplayCountry(  const   Locale&         inLocale,
+    UnicodeString&  getDisplayCountry(  const   Locale&         displayLocale,
                                                 UnicodeString&  dispCountry) const;
 
     /**
@@ -467,13 +553,13 @@
 
     /**
      * Fills in "dispVar" with the name of this locale's variant code in a format
-     * suitable for user display in the locale specified by "inLocale".
-     * @param inLocale  Specifies the locale to be used to display the name.
+     * suitable for user display in the locale specified by "displayLocale".
+     * @param displayLocale  Specifies the locale to be used to display the name.
      * @param dispVar   Receives the variant's display name.
      * @return          A reference to "dispVar".
      * @stable ICU 2.0
      */
-    UnicodeString&  getDisplayVariant(  const   Locale&         inLocale,
+    UnicodeString&  getDisplayVariant(  const   Locale&         displayLocale,
                                                 UnicodeString&  dispVar) const;
 
     /**
@@ -491,17 +577,17 @@
 
     /**
      * Fills in "name" with the name of this locale in a format suitable for user display 
-     * in the locale specfied by "inLocale".  This function uses getDisplayLanguage(),
+     * in the locale specfied by "displayLocale".  This function uses getDisplayLanguage(),
      * getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
-     * name in the format "language (country[,variant])".  For example, if inLocale is
+     * name in the format "language (country[,variant])".  For example, if displayLocale is
      * fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's
      * display name would be "norvégien (Norvège,NY)".
-     * @param inLocale  Specifies the locale to be used to display the name.
+     * @param displayLocale  Specifies the locale to be used to display the name.
      * @param name      Receives the locale's display name.
      * @return          A reference to "name".
      * @stable ICU 2.0
      */
-    UnicodeString&  getDisplayName( const   Locale&         inLocale,
+    UnicodeString&  getDisplayName( const   Locale&         displayLocale,
                                             UnicodeString&  name) const;
 
     /**
@@ -558,18 +644,18 @@
     static const char* const*  getISOLanguages();
 
     /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     * ICU "poor man's RTTI", returns a UClassID for this class.
      *
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
-    virtual inline UClassID getDynamicClassID() const;
+    static UClassID getStaticClassID();
 
     /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
      *
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
-    static inline UClassID getStaticClassID();
+    virtual UClassID getDynamicClassID() const;
 
 protected: /* only protected for testing purposes. DO NOT USE. */
     /**
@@ -603,32 +689,26 @@
     static Locale *getLocaleCache(void);
 
     char language[ULOC_LANG_CAPACITY];
+    char script[ULOC_SCRIPT_CAPACITY];
     char country[ULOC_COUNTRY_CAPACITY];
     int32_t variantBegin;
     char* fullName;
     char fullNameBuffer[ULOC_FULLNAME_CAPACITY];
+    // name without keywords
+    char* baseName;
+    char baseNameBuffer[ULOC_FULLNAME_CAPACITY];
 
     UBool fIsBogus;
 
-    /**
-     * The address of this static class variable serves as this class's ID
-     * for ICU "poor man's RTTI".
-     */
-    static const char fgClassID;
-    
     static const Locale &getLocale(int locid);
 
+    /**
+     * A friend to allow the default locale to be set by either the C or C++ API.
+     * @internal
+     */
     friend void locale_set_default_internal(const char *);
 };
 
-inline UClassID
-Locale::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-Locale::getDynamicClassID() const
-{ return Locale::getStaticClassID(); }
-
 inline UBool
 Locale::operator!=(const    Locale&     other) const
 {
@@ -648,6 +728,12 @@
 }
 
 inline const char *
+Locale::getScript() const
+{
+    return script;
+}
+
+inline const char *
 Locale::getVariant() const
 {
     return &fullName[variantBegin];
@@ -667,3 +753,4 @@
 U_NAMESPACE_END
 
 #endif
+

Index: normlzr.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/normlzr.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- normlzr.h	10 Sep 2003 02:42:04 -0000	1.5
+++ normlzr.h	6 Apr 2004 10:08:07 -0000	1.6
@@ -300,7 +300,7 @@
    *         "mode" normalization form.
    *
    * @see quickCheck
-   * @draft ICU 2.2
+   * @stable ICU 2.2
    */
   static inline UBool
   isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
@@ -420,7 +420,7 @@
    * @see u_strCompare
    * @see u_strCaseCompare
    *
-   * @draft ICU 2.2
+   * @stable ICU 2.2
    */
   static inline int32_t
   compare(const UnicodeString &s1, const UnicodeString &s2,
@@ -692,18 +692,18 @@
   void            getText(UnicodeString&  result);
 
   /**
-   * ICU "poor man's RTTI", returns a UClassID for the actual class.
-   * @return a UClassID for the actual class.
-   * @draft ICU 2.2
+   * ICU "poor man's RTTI", returns a UClassID for this class.
+   * @returns a UClassID for this class.
+   * @stable ICU 2.2
    */
-  virtual inline UClassID getDynamicClassID() const;
+  static UClassID getStaticClassID();
 
   /**
-   * ICU "poor man's RTTI", returns a UClassID for this class.
-   * @returns a UClassID for this class.
-   * @draft ICU 2.2
+   * ICU "poor man's RTTI", returns a UClassID for the actual class.
+   * @return a UClassID for the actual class.
+   * @stable ICU 2.2
    */
-  static inline UClassID getStaticClassID();
+  virtual UClassID getDynamicClassID() const;
 
 private:
   //-------------------------------------------------------------------------
@@ -739,24 +739,11 @@
   UnicodeString       buffer;
   int32_t         bufferPos;
 
-  /**
-   * The address of this static class variable serves as this class's ID
-   * for ICU "poor man's RTTI".
-   */
-  static const char fgClassID;
 };
 
 //-------------------------------------------------------------------------
 // Inline implementations
 //-------------------------------------------------------------------------
-
-inline UClassID
-Normalizer::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-Normalizer::getDynamicClassID() const
-{ return Normalizer::getStaticClassID(); }
 
 inline UBool
 Normalizer::operator!= (const Normalizer& other) const

Index: parsepos.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/parsepos.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- parsepos.h	10 Sep 2003 02:42:04 -0000	1.1
+++ parsepos.h	6 Apr 2004 10:08:07 -0000	1.2
@@ -46,8 +46,10 @@
      * @stable ICU 2.0
      */
     ParsePosition()
-        : UObject()
-      { this->index = 0; this->errorIndex = -1; }
+        : UObject(),
+        index(0),
+        errorIndex(-1)
+      {}
 
     /**
      * Create a new ParsePosition with the given initial index.
@@ -55,8 +57,10 @@
      * @stable ICU 2.0
      */
     ParsePosition(int32_t newIndex)
-        : UObject()
-      {    this->index = newIndex; this->errorIndex = -1; }
+        : UObject(),
+        index(newIndex),
+        errorIndex(-1)
+      {}
 
     /**
      * Copy constructor
@@ -64,14 +68,16 @@
      * @stable ICU 2.0
      */
     ParsePosition(const ParsePosition& copy)
-        : UObject(copy)
-      {    this->index = copy.index; this->errorIndex = copy.errorIndex; }
+        : UObject(copy),
+        index(copy.index),
+        errorIndex(copy.errorIndex)
+      {}
 
     /**
      * Destructor
      * @stable ICU 2.0
      */
-    ~ParsePosition() {}
+    virtual ~ParsePosition();
 
     /**
      * Assignment operator
@@ -94,6 +100,19 @@
     UBool              operator!=(const ParsePosition& that) const;
 
     /**
+     * Clone this object.
+     * Clones can be used concurrently in multiple threads.
+     * If an error occurs, then NULL is returned.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @draft ICU 2.8
+     */
+    ParsePosition *clone() const;
+
+    /**
      * Retrieve the current parse position.  On input to a parse method, this
      * is the index of the character at which parsing will begin; on output, it
      * is the index of the character following the last character parsed.
@@ -126,18 +145,18 @@
     int32_t getErrorIndex(void) const;
 
     /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     * ICU "poor man's RTTI", returns a UClassID for this class.
      *
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
-    virtual inline UClassID getDynamicClassID() const;
+    static UClassID getStaticClassID();
 
     /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
      *
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
-    static inline UClassID getStaticClassID();
+    virtual UClassID getDynamicClassID() const;
 
 private:
     /**
@@ -153,20 +172,7 @@
      */
     int32_t errorIndex;
 
-    /**
-     * The address of this static class variable serves as this class's ID
-     * for ICU "poor man's RTTI".
-     */
-    static const char fgClassID;
 };
-
-inline UClassID
-ParsePosition::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-ParsePosition::getDynamicClassID() const
-{ return ParsePosition::getStaticClassID(); }
 
 inline ParsePosition&
 ParsePosition::operator=(const ParsePosition& copy)

Index: platform.h.in
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/platform.h.in,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- platform.h.in	10 Sep 2003 02:42:04 -0000	1.5
+++ platform.h.in	6 Apr 2004 10:08:07 -0000	1.6
@@ -44,6 +44,14 @@
 #define U_IOSTREAM_SOURCE @U_IOSTREAM_SOURCE@
 #endif
 
+#ifndef U_DEBUG
+#define U_DEBUG @ENABLE_DEBUG@
+#endif
+
+#ifndef U_RELEASE
+#define U_RELEASE @ENABLE_RELEASE@
+#endif
+
 /* Determines whether specific types are available */
 #ifndef U_HAVE_INT8_T
 #define U_HAVE_INT8_T @HAVE_INT8_T@
@@ -104,6 +112,11 @@
 #define U_HAVE_PLACEMENT_NEW @U_HAVE_PLACEMENT_NEW@
 #endif
 
+/* Determine whether to enable tracing. */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING @U_ENABLE_TRACING@
+#endif
+
 /* Define the library suffix in a C syntax. */
 #define U_HAVE_LIB_SUFFIX @U_HAVE_LIB_SUFFIX@
 #define U_LIB_SUFFIX_C_NAME @ICULIBSUFFIXCNAME@
@@ -264,4 +277,3 @@
 /*===========================================================================*/
 
 #define U_MAKE  "@U_MAKE@"
-

Index: pos400.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/pos400.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- pos400.h	10 Sep 2003 02:42:04 -0000	1.3
+++ pos400.h	6 Apr 2004 10:08:07 -0000	1.4
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1997-2001, International Business Machines
+*   Copyright (C) 1997-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@@ -66,6 +66,11 @@
    setting in umachine.h which is for all platforms. */
 #ifndef U_OVERRIDE_CXX_ALLOCATION
 #define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+
+/* Determine whether to enable tracing. */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 1
 #endif
 
 /*===========================================================================*/

Index: putil.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/putil.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- putil.h	10 Sep 2003 02:42:04 -0000	1.4
+++ putil.h	6 Apr 2004 10:08:07 -0000	1.5
@@ -238,7 +238,6 @@
 /**
  * Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70.
  * @return the UTC time measured in seconds 
- * @stable ICU 2.0
  * @internal
  */
 U_CAPI int32_t  U_EXPORT2 uprv_getUTCtime(void);
@@ -314,18 +313,24 @@
  */
 #ifdef XP_MAC
 #   define U_FILE_SEP_CHAR ':'
+#   define U_FILE_ALT_SEP_CHAR ':'
 #   define U_PATH_SEP_CHAR ';'
 #   define U_FILE_SEP_STRING ":"
+#   define U_FILE_ALT_SEP_STRING ":"
 #   define U_PATH_SEP_STRING ";"
 #elif defined(WIN32) || defined(OS2)
 #   define U_FILE_SEP_CHAR '\\'
+#   define U_FILE_ALT_SEP_CHAR '/'
 #   define U_PATH_SEP_CHAR ';'
 #   define U_FILE_SEP_STRING "\\"
+#   define U_FILE_ALT_SEP_STRING "/"
 #   define U_PATH_SEP_STRING ";"
 #else
 #   define U_FILE_SEP_CHAR '/'
+#   define U_FILE_ALT_SEP_CHAR '/'
 #   define U_PATH_SEP_CHAR ':'
 #   define U_FILE_SEP_STRING "/"
+#   define U_FILE_ALT_SEP_STRING "/"
 #   define U_PATH_SEP_STRING ":"
 #endif
 
@@ -369,9 +374,35 @@
 u_UCharsToChars(const UChar *us, char *cs, int32_t length);
 
 /**
+ * Check if a char string only contains invariant characters.
+ * See utypes.h for details.
+ *
+ * @param s Input string pointer.
+ * @param length Length of the string, can be -1 if NUL-terminated.
+ * @return TRUE if s contains only invariant characters.
+ *
+ * @internal (ICU 2.8)
+ */
+U_CAPI UBool U_EXPORT2
+uprv_isInvariantString(const char *s, int32_t length);
+
+/**
+ * Check if a Unicode string only contains invariant characters.
+ * See utypes.h for details.
+ *
+ * @param s Input string pointer.
+ * @param length Length of the string, can be -1 if NUL-terminated.
+ * @return TRUE if s contains only invariant characters.
+ *
+ * @internal (ICU 2.8)
+ */
+U_CAPI UBool U_EXPORT2
+uprv_isInvariantUString(const UChar *s, int32_t length);
+
+/**
  * \def U_UPPER_ORDINAL
  * Get the ordinal number of an uppercase invariant character
- * @stable ICU 2.4
+ * @internal
  */
 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
 #   define U_UPPER_ORDINAL(x) ((x)-'A')
@@ -415,7 +446,7 @@
  */
 #    define U_MAX_PTR(base) ((void *)(((char *)base)-((int32_t)(base))+((int32_t)0xffefff)))
 #  else
-#    define U_MAX_PTR(base) ((void *)(((char *)(base)+0x7fffffff) > (char *)(base) ? ((char *)(base)+0x7fffffff) : (char *)-1))
+#    define U_MAX_PTR(base) ((void *)(((char *)(base)+0x7fffffffu) > (char *)(base) ? ((char *)(base)+0x7fffffffu) : (char *)-1))
 #  endif
 #endif
 

Index: pwin32.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/pwin32.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- pwin32.h	10 Sep 2003 02:42:04 -0000	1.5
+++ pwin32.h	6 Apr 2004 10:08:07 -0000	1.6
@@ -42,6 +42,22 @@
 #define U_IOSTREAM_SOURCE 199711
 #endif
 
+#ifndef U_DEBUG
+#ifdef _DEBUG
+#define U_DEBUG 1
+#else
+#define U_DEBUG 0
+#endif
+#endif
+
+#ifndef U_RELEASE
+#ifdef NDEBUG
+#define U_RELEASE 1
+#else
+#define U_RELEASE 0
+#endif
+#endif
+
 /* Determines whether specific types are available */
 #define U_HAVE_INT8_T 0
 #define U_HAVE_UINT8_T 0
@@ -52,6 +68,10 @@
 #define U_HAVE_INT64_T 0
 #define U_HAVE_UINT64_T 0
 
+/* Define 64 bit limits */
+#define INT64_C(x) x
+#define UINT64_C(x) x
+
 /* Define whether namespace is supported */
 #define U_HAVE_NAMESPACE 1
 
@@ -65,6 +85,11 @@
 /* Determine whether to override placement new and delete for STL. */
 #ifndef U_HAVE_PLACEMENT_NEW
 #define U_HAVE_PLACEMENT_NEW 1
+#endif
+
+/* Determine whether to enable tracing. */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 1
 #endif
 
 /*===========================================================================*/

Index: rbbi.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/rbbi.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- rbbi.h	10 Sep 2003 02:42:04 -0000	1.1
+++ rbbi.h	6 Apr 2004 10:08:07 -0000	1.2
@@ -30,6 +30,7 @@
 class  RuleBasedBreakIteratorTables;
 class  BreakIterator;
 class  RBBIDataWrapper;
+struct RBBIStateTable;
 
 
 
@@ -61,8 +62,6 @@
      * @internal
      */
     RBBIDataWrapper    *fData;
-    /** @internal */
-    UTrie              *fCharMappings;
 
     /** Rule {tag} value for the most recent match. 
      *  @internal
@@ -93,27 +92,12 @@
     static UBool        fTrace;
 
 
-
-private:
-    /**
-     * Class ID
-     */
-    static const char fgClassID;
-
 protected:
     //=======================================================================
     // constructors
     //=======================================================================
 
     /**
-     * This constructor uses the udata interface to create a BreakIterator
-     * whose internal tables live in a memory-mapped file.  "image" is a pointer
-     * to the beginning of that file.
-     * @internal
-     */
-    RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
-
-    /**
      * Constructor from a flattened set of RBBI data in malloced memory.
      *             RulesBasedBreakIterators built from a custom set of rules
      *             are created via this constructor; the rules are compiled
@@ -134,7 +118,7 @@
 
     /** Default constructor.  Creates an empty shell of an iterator, with no
      *  rules or text to iterate over.   Object can subsequently be assigned to.
-     *  @draft ICU 2.2
+     *  @stable ICU 2.2
      */
     RuleBasedBreakIterator();
 
@@ -152,11 +136,27 @@
      * @param parseError  In the event of a syntax error in the rules, provides the location
      *                    within the rules of the problem.
      * @param status Information on any errors encountered.
-     *  @draft ICU 2.2
+     * @stable ICU 2.2
      */
     RuleBasedBreakIterator( const UnicodeString    &rules,
                              UParseError           &parseError,
                              UErrorCode            &status);
+
+
+    /**
+     * This constructor uses the udata interface to create a BreakIterator
+     * whose internal tables live in a memory-mapped file.  "image" is an 
+     * ICU UDataMemory handle for the pre-compiled break iterator tables.
+     * @param image handle to the memory image for the break iterator data.
+     *        Ownership of the UDataMemory handle passes to the Break Iterator,
+     *        which will be responsible for closing it when it is no longer needed.
+     * @param status Information on any errors encountered.
+     * @see udata_open
+     * @see #getBinaryRules
+     * @draft ICU 2.8
+     */
+    RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
+
     /**
      * Destructor
      *  @stable ICU 2.0
@@ -345,7 +345,7 @@
      * returned break position.
      *
      * @see UWordBreak
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
     virtual int32_t getRuleStatus() const;
 
@@ -360,7 +360,7 @@
      *                  other classes have different class IDs.
      * @stable ICU 2.0
      */
-    inline virtual UClassID getDynamicClassID(void) const;
+    virtual UClassID getDynamicClassID(void) const;
 
     /**
      * Returns the class ID for this class.  This is useful only for
@@ -373,7 +373,7 @@
      * @return          The class ID for all objects of this class.
      * @stable ICU 2.0
      */
-    inline static UClassID getStaticClassID(void);
+    static UClassID getStaticClassID(void);
 
     /*
      * Create a clone (copy) of this break iterator in memory provided
@@ -411,7 +411,7 @@
      * is much faster than building one from the source form of the
      * break rules.
      *
-     * The binary data is can only be used with the same version of ICU
+     * The binary data can only be used with the same version of ICU
      *  and on the same platform type (processor endian-ness)
      *
      * @param length Returns the length of the binary data.  (Out paramter.)
@@ -473,24 +473,39 @@
       */
     void init();
 
+private:
+
+    /**
+     * This method backs the iterator back up to a "safe position" in the text.
+     * This is a position that we know, without any context, must be a break position.
+     * The various calling methods then iterate forward from this safe position to
+     * the appropriate position to return.  (For more information, see the description
+     * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
+     * @param statetable state table used of moving backwards
+     * @internal
+     */
+    int32_t handlePrevious(const RBBIStateTable *statetable);
+
+    /**
+     * This method is the actual implementation of the next() method.  All iteration
+     * vectors through here.  This method initializes the state machine to state 1
+     * and advances through the text character by character until we reach the end
+     * of the text or the state machine transitions to state 0.  We update our return
+     * value every time the state machine passes through a possible end state.
+     * @param statetable state table used of moving forwards
+     * @internal
+     */
+    int32_t handleNext(const RBBIStateTable *statetable);
 };
 
-//----------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 //
 //   Inline Functions Definitions ...
 //
-//----------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
 
 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
     return !operator==(that);
-}
-
-inline UClassID RuleBasedBreakIterator::getStaticClassID(void) {
-    return (UClassID)(&fgClassID);
-}
-
-inline UClassID RuleBasedBreakIterator::getDynamicClassID(void) const {
-    return RuleBasedBreakIterator::getStaticClassID();
 }
 
 U_NAMESPACE_END

Index: rep.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/rep.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- rep.h	10 Sep 2003 02:42:04 -0000	1.4
+++ rep.h	6 Apr 2004 10:08:07 -0000	1.5
@@ -14,7 +14,6 @@
 #ifndef REP_H
 #define REP_H
 
-#include "unicode/utypes.h"
 #include "unicode/uobject.h"
 
 U_NAMESPACE_BEGIN
@@ -234,8 +233,6 @@
 };
 
 inline Replaceable::Replaceable() {}
-
-inline Replaceable::~Replaceable() {}
 
 inline int32_t
 Replaceable::length() const {

Index: resbund.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/resbund.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- resbund.h	10 Sep 2003 02:42:04 -0000	1.4
+++ resbund.h	6 Apr 2004 10:08:07 -0000	1.5
@@ -169,8 +169,24 @@
     ~ResourceBundle();
 
     /**
+     * Clone this object.
+     * Clones can be used concurrently in multiple threads.
+     * If an error occurs, then NULL is returned.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @draft ICU 2.8
+     */
+    ResourceBundle *clone() const;
+
+    /**
      * Returns the size of a resource. Size for scalar types is always 1, and for vector/table types is
      * the number of child resources.
+     * @warning Integer array is treated as a scalar type. There are no 
+     *          APIs to access individual members of an integer array. It
+     *          is always returned as a whole.
      *
      * @return number of resources in a given resource.
      * @stable ICU 2.0
@@ -387,7 +403,7 @@
      * @return  A version number string as specified in the resource bundle or its parent.
      *          The caller does not own this string.
      * @see getVersion
-     * @stable ICU 2.0
+     * @deprecated ICU 2.8 Use getVersion instead.
      */
     const char*   
       getVersionNumber(void) const;
@@ -406,24 +422,37 @@
      * Return the Locale associated with this ResourceBundle. 
      *
      * @return a Locale object
-     * @stable ICU 2.0
+     * @deprecated ICU 2.8 Use getLocale(ULocDataLocaleType type, UErrorCode &status) overload instead.
      */
     const Locale&
       getLocale(void) const;
 
     /**
+     * Return the Locale associated with this ResourceBundle. 
+     * @param type You can choose between requested, valid and actual
+     *             locale. For description see the definition of
+     *             ULocDataLocaleType in uloc.h
+     * @param status just for catching illegal arguments
+     *
+     * @return a Locale object
+     * @draft ICU 2.8
+     */
+    const Locale 
+      getLocale(ULocDataLocaleType type, UErrorCode &status) const;
+
+    /**
      * ICU "poor man's RTTI", returns a UClassID for the actual class.
      *
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
-    virtual inline UClassID getDynamicClassID() const;
+    virtual UClassID getDynamicClassID() const;
 
     /**
      * ICU "poor man's RTTI", returns a UClassID for this class.
      *
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
-    static inline UClassID getStaticClassID();
+    static UClassID getStaticClassID();
 
 private:
     ResourceBundle(); // default constructor not implemented
@@ -432,20 +461,7 @@
     void constructForLocale(const UnicodeString& path, const Locale& locale, UErrorCode& error);
     Locale *locName;
 
-    /**
-     * The address of this static class variable serves as this class's ID
-     * for ICU "poor man's RTTI".
-     */
-    static const char fgClassID;
 };
-
-inline UClassID 
-ResourceBundle::getStaticClassID() 
-{ return (UClassID)&fgClassID; }
-
-inline UClassID 
-ResourceBundle::getDynamicClassID() const 
-{ return ResourceBundle::getStaticClassID(); }
 
 U_NAMESPACE_END
 #endif

Index: schriter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/schriter.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- schriter.h	10 Sep 2003 02:42:04 -0000	1.4
+++ schriter.h	6 Apr 2004 10:08:07 -0000	1.5
@@ -153,7 +153,7 @@
    * @return a class ID for this class
    * @stable ICU 2.0
    */
-  static inline UClassID   getStaticClassID(void);
+  static UClassID   getStaticClassID(void);
 
 protected:
   /**
@@ -176,17 +176,7 @@
    */
   UnicodeString            text;
 
-private:
-  static const char        fgClassID;
 };
-
-inline UClassID
-StringCharacterIterator::getStaticClassID(void) 
-{ return (UClassID)(&fgClassID); }
-
-inline UClassID
-StringCharacterIterator::getDynamicClassID(void) const 
-{ return StringCharacterIterator::getStaticClassID(); }
 
 U_NAMESPACE_END
 #endif

Index: strenum.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/strenum.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- strenum.h	10 Sep 2003 02:42:04 -0000	1.1
+++ strenum.h	6 Apr 2004 10:08:07 -0000	1.2
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2002, International Business Machines
+*   Copyright (C) 2002-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@@ -11,11 +11,10 @@
 #define STRENUM_H
 
 #include "unicode/uobject.h"
+#include "unicode/unistr.h"
 
 U_NAMESPACE_BEGIN
 
-class UnicodeString;
-
 /**
  * Base class for 'pure' C++ implementations of uenum api.  Adds a
  * method that returns the next UnicodeString since in C++ this can
@@ -43,120 +42,206 @@
  * upon any subsequent call to the enumeration's destructor, next,
  * unext, snext, or reset.</p>
  *
+ * ICU 2.8 adds some default implementations and helper functions
+ * for subclasses.
+ *
  * @draft ICU 2.4 
  */
 class U_COMMON_API StringEnumeration : public UObject { 
- public:
-  /**
-   * Destructor.
-   * @draft ICU 2.4
-   */
-  virtual ~StringEnumeration();
+public:
+    /**
+     * Destructor.
+     * @draft ICU 2.4
+     */
+    virtual ~StringEnumeration();
 
-  /**
-   * <p>Return the number of elements that the iterator traverses.  If
-   * the iterator is out of sync with its service, status is set to
-   * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
-   *
-   * <p>The return value will not change except possibly as a result of
-   * a subsequent call to reset, or if the iterator becomes out of sync.</p>
-   *
-   * <p>This is a convenience function. It can end up being very
-   * expensive as all the items might have to be pre-fetched
-   * (depending on the storage format of the data being
-   * traversed).</p>
-   *
-   * @param status the error code.
-   * @return number of elements in the iterator.
-   *
-   * @draft ICU 2.4 */
-  virtual int32_t count(UErrorCode& status) const = 0;
+    /**
+     * Clone this object, an instance of a subclass of StringEnumeration.
+     * Clones can be used concurrently in multiple threads.
+     * If a subclass does not implement clone(), or if an error occurs,
+     * then NULL is returned.
+     * The clone functions in all subclasses return a base class pointer
+     * because some compilers do not support covariant (same-as-this)
+     * return types; cast to the appropriate subclass if necessary.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @draft ICU 2.8
+     */
+    virtual StringEnumeration *clone() const;
 
-  /**
-   * <p>Returns the next element as a NUL-terminated char*.  If there
-   * are no more elements, returns NULL.  If the resultLength pointer
-   * is not NULL, the length of the string (not counting the
-   * terminating NUL) is returned at that address.  If an error
-   * status is returned, the value at resultLength is undefined.</p>
-   *
-   * <p>The returned pointer is owned by this iterator and must not be
-   * deleted by the caller.  The pointer is valid until the next call
-   * to next, unext, snext, reset, or the enumerator's destructor.</p>
-   *
-   * <p>If the iterator is out of sync with its service, status is set
-   * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
-   *
-   * <p>If the native service string is a UChar* string, it is
-   * converted to char* with the invariant converter.  If the
-   * conversion fails (because a character cannot be converted) then
-   * status is set to U_INVARIANT_CONVERSION_ERROR and the return
-   * value is undefined (though not NULL).</p>
-   *
-   * @param status the error code.
-   * @param resultLength a pointer to receive the length, can be NULL.
-   * @return a pointer to the string, or NULL.
-   *
-   * @draft ICU 2.4 
-   */
-  virtual const char* next(int32_t *resultLength, UErrorCode& status) = 0;
+    /**
+     * <p>Return the number of elements that the iterator traverses.  If
+     * the iterator is out of sync with its service, status is set to
+     * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
+     *
+     * <p>The return value will not change except possibly as a result of
+     * a subsequent call to reset, or if the iterator becomes out of sync.</p>
+     *
+     * <p>This is a convenience function. It can end up being very
+     * expensive as all the items might have to be pre-fetched
+     * (depending on the storage format of the data being
+     * traversed).</p>
+     *
+     * @param status the error code.
+     * @return number of elements in the iterator.
+     *
+     * @draft ICU 2.4 */
+    virtual int32_t count(UErrorCode& status) const = 0;
 
-  /**
-   * <p>Returns the next element as a NUL-terminated UChar*.  If there
-   * are no more elements, returns NULL.  If the resultLength pointer
-   * is not NULL, the length of the string (not counting the
-   * terminating NUL) is returned at that address.  If an error
-   * status is returned, the value at resultLength is undefined.</p>
-   *
-   * <p>The returned pointer is owned by this iterator and must not be
-   * deleted by the caller.  The pointer is valid until the next call
-   * to next, unext, snext, reset, or the enumerator's destructor.</p>
-   *
-   * <p>If the iterator is out of sync with its service, status is set
-   * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
-   *
-   * @param status the error code.
-   * @param resultLength a ponter to receive the length, can be NULL.
-   * @return a pointer to the string, or NULL.
-   *
-   * @draft ICU 2.4 
-   */
-  virtual const UChar* unext(int32_t *resultLength, UErrorCode& status) = 0;
+    /**
+     * <p>Returns the next element as a NUL-terminated char*.  If there
+     * are no more elements, returns NULL.  If the resultLength pointer
+     * is not NULL, the length of the string (not counting the
+     * terminating NUL) is returned at that address.  If an error
+     * status is returned, the value at resultLength is undefined.</p>
+     *
+     * <p>The returned pointer is owned by this iterator and must not be
+     * deleted by the caller.  The pointer is valid until the next call
+     * to next, unext, snext, reset, or the enumerator's destructor.</p>
+     *
+     * <p>If the iterator is out of sync with its service, status is set
+     * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+     *
+     * <p>If the native service string is a UChar* string, it is
+     * converted to char* with the invariant converter.  If the
+     * conversion fails (because a character cannot be converted) then
+     * status is set to U_INVARIANT_CONVERSION_ERROR and the return
+     * value is undefined (though not NULL).</p>
+     *
+     * Starting with ICU 2.8, the default implementation calls snext()
+     * and handles the conversion.
+     *
+     * @param status the error code.
+     * @param resultLength a pointer to receive the length, can be NULL.
+     * @return a pointer to the string, or NULL.
+     *
+     * @draft ICU 2.4 
+     */
+    virtual const char* next(int32_t *resultLength, UErrorCode& status);
 
-  /**
-   * <p>Returns the next element a UnicodeString*.  If there are no
-   * more elements, returns NULL.</p>
-   *
-   * <p>The returned pointer is owned by this iterator and must not be
-   * deleted by the caller.  The pointer is valid until the next call
-   * to next, unext, snext, reset, or the enumerator's destructor.</p>
-   *
-   * <p>If the iterator is out of sync with its service, status is set
-   * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
-   *
-   * @param status the error code.
-   * @return a pointer to the string, or NULL.
-   *
-   * @draft ICU 2.4 
-   */
-  virtual const UnicodeString* snext(UErrorCode& status) = 0;
+    /**
+     * <p>Returns the next element as a NUL-terminated UChar*.  If there
+     * are no more elements, returns NULL.  If the resultLength pointer
+     * is not NULL, the length of the string (not counting the
+     * terminating NUL) is returned at that address.  If an error
+     * status is returned, the value at resultLength is undefined.</p>
+     *
+     * <p>The returned pointer is owned by this iterator and must not be
+     * deleted by the caller.  The pointer is valid until the next call
+     * to next, unext, snext, reset, or the enumerator's destructor.</p>
+     *
+     * <p>If the iterator is out of sync with its service, status is set
+     * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+     *
+     * Starting with ICU 2.8, the default implementation calls snext()
+     * and handles the conversion.
+     *
+     * @param status the error code.
+     * @param resultLength a ponter to receive the length, can be NULL.
+     * @return a pointer to the string, or NULL.
+     *
+     * @draft ICU 2.4 
+     */
+    virtual const UChar* unext(int32_t *resultLength, UErrorCode& status);
 
-  /**
-   * <p>Resets the iterator.  This re-establishes sync with the
-   * service and rewinds the iterator to start at the first
-   * element.</p>
-   *
-   * <p>Previous pointers returned by next, unext, or snext become
-   * invalid, and the value returned by count might change.</p>
-   *
-   * @param status the error code.
-   *
-   * @draft ICU 2.4 
-   */
-  virtual void reset(UErrorCode& status) = 0;
-};
+    /**
+     * <p>Returns the next element a UnicodeString*.  If there are no
+     * more elements, returns NULL.</p>
+     *
+     * <p>The returned pointer is owned by this iterator and must not be
+     * deleted by the caller.  The pointer is valid until the next call
+     * to next, unext, snext, reset, or the enumerator's destructor.</p>
+     *
+     * <p>If the iterator is out of sync with its service, status is set
+     * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+     *
+     * @param status the error code.
+     * @return a pointer to the string, or NULL.
+     *
+     * @draft ICU 2.4 
+     */
+    virtual const UnicodeString* snext(UErrorCode& status) = 0;
 
-inline StringEnumeration::~StringEnumeration() {
-}
+    /**
+     * <p>Resets the iterator.  This re-establishes sync with the
+     * service and rewinds the iterator to start at the first
+     * element.</p>
+     *
+     * <p>Previous pointers returned by next, unext, or snext become
+     * invalid, and the value returned by count might change.</p>
+     *
+     * @param status the error code.
+     *
+     * @draft ICU 2.4 
+     */
+    virtual void reset(UErrorCode& status) = 0;
+
+protected:
+    /**
+     * UnicodeString field for use with default implementations and subclasses.
+     * @draft ICU 2.8
+     */
+    UnicodeString unistr;
+    /**
+     * char * default buffer for use with default implementations and subclasses.
+     * @draft ICU 2.8
+     */
+    char charsBuffer[32];
+    /**
+     * char * buffer for use with default implementations and subclasses.
+     * Allocated in constructor and in ensureCharsCapacity().
+     * @draft ICU 2.8
+     */
+    char *chars;
+    /**
+     * Capacity of chars, for use with default implementations and subclasses.
+     * @draft ICU 2.8
+     */
+    int32_t charsCapacity;
+
+    /**
+     * Default constructor for use with default implementations and subclasses.
+     * @draft ICU 2.8
+     */
+    StringEnumeration();
+
+    /**
+     * Ensures that chars is at least as large as the requested capacity.
+     * For use with default implementations and subclasses.
+     *
+     * @param capacity Requested capacity.
+     * @param status ICU in/out error code.
+     * @draft ICU 2.8
+     */
+    void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
+
+    /**
+     * Converts s to Unicode and sets unistr to the result.
+     * For use with default implementations and subclasses,
+     * especially for implementations of snext() in terms of next().
+     * This is provided with a helper function instead of a default implementation
+     * of snext() to avoid potential infinite loops between next() and snext().
+     *
+     * For example:
+     * \code
+     * const UnicodeString* snext(UErrorCode& status) {
+     *   int32_t resultLength=0;
+     *   const char *s=next(&resultLength, status);
+     *   return setChars(s, resultLength, status);
+     * }
+     * \endcode
+     *
+     * @param s String to be converted to Unicode.
+     * @param length Length of the string.
+     * @param status ICU in/out error code.
+     * @return A pointer to unistr.
+     * @draft ICU 2.8
+     */
+    UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
+};
 
 U_NAMESPACE_END
 

Index: ubrk.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/ubrk.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- ubrk.h	10 Sep 2003 02:42:04 -0000	1.1
+++ ubrk.h	6 Apr 2004 10:08:07 -0000	1.2
@@ -7,6 +7,7 @@
 #define UBRK_H
 
 #include "unicode/utypes.h"
+#include "unicode/uloc.h"
 
 /**
  * A text-break iterator.
@@ -196,8 +197,9 @@
    * Title Case breaks 
    * The iterator created using this type locates title boundaries as described for 
    * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
-   * please use Word Boundary iterator.  @draft ICU 2.2
+   * please use Word Boundary iterator.
    *
+   * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
    */
   UBRK_TITLE
 } UBreakIteratorType;
@@ -214,7 +216,7 @@
  *  word, to allow for further subdivisions of a category in future releases.
  *  Applications should check for tag values falling within the range, rather
  *  than for single individual values.
- *  @draft ICU 2.2
+ *  @stable ICU 2.2
 */
 typedef enum UWordBreak {
     /** Tag value for "words" that do not fit into any of other categories. 
@@ -241,6 +243,54 @@
     UBRK_WORD_IDEO_LIMIT     = 500
 } UWordBreak;
 
+/**
+ *  Enum constants for the line break tags returned by getRuleStatus().
+ *  A range of values is defined for each category of
+ *  word, to allow for further subdivisions of a category in future releases.
+ *  Applications should check for tag values falling within the range, rather
+ *  than for single individual values.
+ *  @draft ICU 2.8
+*/
+typedef enum ULineBreakTag {
+    /** Tag value for soft line breaks, positions at which a line break
+      *  is acceptable but not required                */
+    UBRK_LINE_SOFT            = 0,
+    /** Upper bound for soft line breaks.              */
+    UBRK_LINE_SOFT_LIMIT      = 100,
+    /** Tag value for a hard, or mandatory line break  */
+    UBRK_LINE_HARD            = 100,
+    /** Upper bound for hard line breaks.              */
+    UBRK_LINE_HARD_LIMIT      = 200
+} ULineBreakTag;
+
+
+
+/**
+ *  Enum constants for the sentence break tags returned by getRuleStatus().
+ *  A range of values is defined for each category of
+ *  sentence, to allow for further subdivisions of a category in future releases.
+ *  Applications should check for tag values falling within the range, rather
+ *  than for single individual values.
+ *  @draft ICU 2.8
+*/
+typedef enum USentenceBreakTag {
+    /** Tag value for for sentences  ending with a sentence terminator
+      * ('.', '?', '!', etc.) character, possibly followed by a
+      * hard separator (CR, LF, PS, etc.)
+      */
+    UBRK_SENTENCE_TERM       = 0,
+    /** Upper bound for tags for sentences ended by sentence terminators.    */
+    UBRK_SENTENCE_TERM_LIMIT = 100,
+    /** Tag value for for sentences that do not contain an ending
+      * sentence terminator ('.', '?', '!', etc.) character, but 
+      * are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
+      */
+    UBRK_SENTENCE_SEP        = 100,
+    /** Upper bound for tags for sentences ended by a separator.              */
+    UBRK_SENTENCE_SEP_LIMIT  = 200
+    /** Tag value for a hard, or mandatory line break  */
+} USentenceBreakTag;
+
 
 /**
  * Open a new UBreakIterator for locating text boundaries for a specified locale.
@@ -276,7 +326,7 @@
  * @param status A UErrorCode to receive any errors.
  * @return A UBreakIterator for the specified rules.
  * @see ubrk_open
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI UBreakIterator* U_EXPORT2
 ubrk_openRules(const UChar     *rules,
@@ -466,10 +516,23 @@
  * status, a default value of 0 is returned.
  * <p>
  * For word break iterators, the possible values are defined in enum UWordBreak.
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI  int32_t U_EXPORT2
 ubrk_getRuleStatus(UBreakIterator *bi);
+
+/**
+ * Return the locale of the break iterator. You can choose between the valid and
+ * the actual locale.
+ * @param bi break iterator
+ * @param type locale type (valid or actual)
+ * @param status error code
+ * @return locale string
+ * @draft ICU 2.8 likely to change in ICU 3.0, based on feedback
+ */
+U_CAPI const char* U_EXPORT2
+ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
+
 
 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
 

Index: uchar.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uchar.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- uchar.h	10 Sep 2003 02:42:04 -0000	1.5
+++ uchar.h	6 Apr 2004 10:08:07 -0000	1.6
@@ -290,45 +290,45 @@
     UCHAR_BINARY_LIMIT,
 
     /** Enumerated property Bidi_Class.
-        Same as u_charDirection, returns UCharDirection values. @draft ICU 2.2 */
+        Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
     UCHAR_BIDI_CLASS=0x1000,
-    /** First constant for enumerated/integer Unicode properties. @draft ICU 2.2 */
+    /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
     UCHAR_INT_START=UCHAR_BIDI_CLASS,
     /** Enumerated property Block.
-        Same as ublock_getCode, returns UBlockCode values. @draft ICU 2.2 */
+        Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
     UCHAR_BLOCK,
     /** Enumerated property Canonical_Combining_Class.
-        Same as u_getCombiningClass, returns 8-bit numeric values. @draft ICU 2.2 */
+        Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
     UCHAR_CANONICAL_COMBINING_CLASS,
     /** Enumerated property Decomposition_Type.
-        Returns UDecompositionType values. @draft ICU 2.2 */
+        Returns UDecompositionType values. @stable ICU 2.2 */
     UCHAR_DECOMPOSITION_TYPE,
     /** Enumerated property East_Asian_Width.
         See http://www.unicode.org/reports/tr11/
-        Returns UEastAsianWidth values. @draft ICU 2.2 */
+        Returns UEastAsianWidth values. @stable ICU 2.2 */
     UCHAR_EAST_ASIAN_WIDTH,
     /** Enumerated property General_Category.
-        Same as u_charType, returns UCharCategory values. @draft ICU 2.2 */
+        Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
     UCHAR_GENERAL_CATEGORY,
     /** Enumerated property Joining_Group.
-        Returns UJoiningGroup values. @draft ICU 2.2 */
+        Returns UJoiningGroup values. @stable ICU 2.2 */
     UCHAR_JOINING_GROUP,
     /** Enumerated property Joining_Type.
-        Returns UJoiningType values. @draft ICU 2.2 */
+        Returns UJoiningType values. @stable ICU 2.2 */
     UCHAR_JOINING_TYPE,
     /** Enumerated property Line_Break.
-        Returns ULineBreak values. @draft ICU 2.2 */
+        Returns ULineBreak values. @stable ICU 2.2 */
     UCHAR_LINE_BREAK,
     /** Enumerated property Numeric_Type.
-        Returns UNumericType values. @draft ICU 2.2 */
+        Returns UNumericType values. @stable ICU 2.2 */
     UCHAR_NUMERIC_TYPE,
     /** Enumerated property Script.
-        Same as uscript_getScript, returns UScriptCode values. @draft ICU 2.2 */
+        Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
     UCHAR_SCRIPT,
     /** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
         Returns UHangulSyllableType values. @draft ICU 2.6 */
     UCHAR_HANGUL_SYLLABLE_TYPE,
-    /** One more than the last constant for enumerated/integer Unicode properties. @draft ICU 2.2 */
+    /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
     UCHAR_INT_LIMIT,
 
     /** Bitmask property General_Category_Mask.
@@ -953,33 +953,33 @@
 
     /* New blocks in Unicode 3.2 */
 
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_CYRILLIC_SUPPLEMENTARY = 97, /*[0500]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_TAGALOG = 98, /*[1700]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_HANUNOO = 99, /*[1720]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_BUHID = 100, /*[1740]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_TAGBANWA = 101, /*[1760]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
-    /** @draft ICU 2.2 */
+    /** @stable ICU 2.2 */
     UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/
 
     /* New blocks in Unicode 4 */
@@ -1030,7 +1030,7 @@
  *
  * @see UCHAR_EAST_ASIAN_WIDTH
  * @see u_getIntPropertyValue
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 typedef enum UEastAsianWidth {
     U_EA_NEUTRAL,   /*[N]*/ /*See note !!*/
@@ -1087,7 +1087,7 @@
  * Decomposition Type constants.
  *
  * @see UCHAR_DECOMPOSITION_TYPE
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 typedef enum UDecompositionType {
     U_DT_NONE,              /*[none]*/ /*See note !!*/
@@ -1115,7 +1115,7 @@
  * Joining Type constants.
  *
  * @see UCHAR_JOINING_TYPE
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 typedef enum UJoiningType {
     U_JT_NON_JOINING,       /*[U]*/ /*See note !!*/
@@ -1131,7 +1131,7 @@
  * Joining Group constants.
  *
  * @see UCHAR_JOINING_GROUP
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 typedef enum UJoiningGroup {
     U_JG_NO_JOINING_GROUP,
@@ -1195,7 +1195,7 @@
  * Line Break constants.
  *
  * @see UCHAR_LINE_BREAK
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 typedef enum ULineBreak {
     U_LB_UNKNOWN,           /*[XX]*/ /*See note !!*/
@@ -1236,7 +1236,7 @@
  * Numeric Type constants.
  *
  * @see UCHAR_NUMERIC_TYPE
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 typedef enum UNumericType {
     U_NT_NONE,              /*[None]*/ /*See note !!*/
@@ -1392,7 +1392,7 @@
  * @see u_getIntPropertyMinValue
  * @see u_getIntPropertyMaxValue
  * @see u_getUnicodeVersion
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI int32_t U_EXPORT2
 u_getIntPropertyValue(UChar32 c, UProperty which);
@@ -1413,7 +1413,7 @@
  * @see u_getUnicodeVersion
  * @see u_getIntPropertyMaxValue
  * @see u_getIntPropertyValue
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI int32_t U_EXPORT2
 u_getIntPropertyMinValue(UProperty which);
@@ -1442,7 +1442,7 @@
  * @see u_getUnicodeVersion
  * @see u_getIntPropertyMaxValue
  * @see u_getIntPropertyValue
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI int32_t U_EXPORT2
 u_getIntPropertyMaxValue(UProperty which);
@@ -1465,7 +1465,7 @@
  * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
  *
  * @see U_NO_NUMERIC_VALUE
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI double U_EXPORT2
 u_getNumericValue(UChar32 c);
@@ -1475,7 +1475,7 @@
  * no numeric value is defined for a code point.
  *
  * @see u_getNumericValue
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 #define U_NO_NUMERIC_VALUE ((double)-123456789.)
 
@@ -2118,7 +2118,7 @@
  *         length of the name.
  *         The length does not include the zero-termination.
  *
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI int32_t U_EXPORT2
 u_getISOComment(UChar32 c,
@@ -2681,7 +2681,7 @@
  *         length of the name.
  *         The length does not include the zero-termination.
  *
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI int32_t U_EXPORT2
 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);

Index: uchriter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uchriter.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- uchriter.h	10 Sep 2003 02:42:04 -0000	1.4
+++ uchriter.h	6 Apr 2004 10:08:07 -0000	1.5
@@ -345,18 +345,18 @@
   virtual void            getText(UnicodeString& result);
 
   /**
-   * Return a class ID for this object (not really public) 
-   * @return a class ID for this object.  
+   * Return a class ID for this class (not really public) 
+   * @return a class ID for this class  
    * @stable ICU 2.0
    */
-  virtual UClassID         getDynamicClassID(void) const;
+  static UClassID         getStaticClassID(void);
 
   /**
-   * Return a class ID for this class (not really public) 
-   * @return a class ID for this class  
+   * Return a class ID for this object (not really public) 
+   * @return a class ID for this object.  
    * @stable ICU 2.0
    */
-  static inline UClassID   getStaticClassID(void);
+  virtual UClassID        getDynamicClassID(void) const;
 
 protected:
   /**
@@ -370,17 +370,7 @@
    */      
   const UChar*            text;
 
-private:
-  static const char       fgClassID;
 };
-
-inline UClassID
-UCharCharacterIterator::getStaticClassID(void) 
-{ return (UClassID)(&fgClassID); }
-
-inline UClassID
-UCharCharacterIterator::getDynamicClassID(void) const 
-{ return UCharCharacterIterator::getStaticClassID(); }
 
 U_NAMESPACE_END
 #endif

Index: uclean.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uclean.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- uclean.h	10 Sep 2003 02:42:04 -0000	1.4
+++ uclean.h	6 Apr 2004 10:08:07 -0000	1.5
@@ -33,6 +33,10 @@
  *  application must guarantee that the first call to u_init() happen
  *  without contention, in a single thread only.
  *  <p>
+ *  If <code>u_setMemoryFunctions()</code> or 
+ *  <code>u_setMutexFunctions</code> are needed (uncommon), they must be
+ *  called _before_ <code>u_init()</code>.
+ *  <p>
  *  Extra, repeated, or otherwise unneeded calls to u_init() do no harm,
  *  other than taking a small amount of time.
  *
@@ -85,5 +89,153 @@
  */
 U_CAPI void U_EXPORT2 
 u_cleanup(void);
+
+
+
+
+/**
+  * An opaque pointer type that represents an ICU mutex.
+  * For user-implemented mutexes, the value will typically point to a
+  *  struct or object that implements the mutex.
+  * @draft ICU 2.8
+  * @system
+  */
+typedef void *UMTX;
+
+/**
+  *  Function Pointer type for a user supplied mutex initialization function.
+  *  The user-supplied function will be called by ICU whenever ICU needs to create a
+  *  new mutex.  The function implementation should create a mutex, and store a pointer
+  *  to something that uniquely identifies the mutex into the UMTX that is supplied
+  *  as a paramter.
+  *  @param context user supplied value, obtained from from u_setMutexFunctions().
+  *  @param mutex   Receives a pointer that identifies the new mutex.
+  *                 The mutex init function must set the UMTX to a non-null value.   
+  *                 Subsequent calls by ICU to lock, unlock, or destroy a mutex will 
+  *                 identify the mutex by the UMTX value.
+  *  @param status  Error status.  Report errors back to ICU by setting this variable
+  *                 with an error code.
+  *  @draft ICU 2.8
+  *  @system
+  */
+typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX  *mutex, UErrorCode* status);
+
+
+/**
+  *  Function Pointer type for a user supplied mutex functions.
+  *  One of the  user-supplied functions with this signature will be called by ICU
+  *  whenever ICU needs to lock, unlock, or destroy a mutex.
+  *  @param context user supplied value, obtained from from u_setMutexFunctions().
+  *  @param mutex   specify the mutex on which to operate.
+  *  @draft ICU 2.8
+  *  @system
+  */
+typedef void U_CALLCONV UMtxFn   (const void *context, UMTX  *mutex);
+
+
+/**
+  *  Set the functions that ICU will use for mutex operations
+  *  Use of this function is optional; by default (without this function), ICU will
+  *  directly access system functions for mutex operations
+  *  This function can only be used when ICU is in an initial, unused state, before
+  *  u_init() has been called.
+  *  This function may be used even when ICU has been built without multi-threaded
+  *  support  (see ICU_USE_THREADS pre-processor variable, umutex.h)
+  *  @param context This pointer value will be saved, and then (later) passed as
+  *                 a parameter to the user-supplied mutex functions each time they
+  *                 are called. 
+  *  @param init    Pointer to a mutex initialization function.  Must be non-null.
+  *  @param destroy Pointer to the mutex destroy function.  Must be non-null.
+  *  @param lock    pointer to the mutex lock function.  Must be non-null.
+  *  @param unlock  Pointer to the mutex unlock function.  Must be non-null.
+  *  @param status  Receives error values.
+  *  @draft ICU 2.8
+  *  @system
+  */  
+U_CAPI void U_EXPORT2 
+u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock,
+                    UErrorCode *status);
+
+
+/**
+  *  Pointer type for a user supplied atomic increment or decrement function.
+  *  @param context user supplied value, obtained from from u_setAtomicIncDecFunctions().
+  *  @param p   Pointer to a 32 bit int to be incremented or decremented
+  *  @return    The value of the variable after the inc or dec operation.
+  *  @draft ICU 2.8
+  *  @system
+  */
+typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p);
+
+/**
+ *  Set the functions that ICU will use for atomic increment and decrement of int32_t values.
+ *  Use of this function is optional; by default (without this function), ICU will
+ *  use its own internal implementation of atomic increment/decrement.
+ *  This function can only be used when ICU is in an initial, unused state, before
+ *  u_init() has been called.
+ *  @param context This pointer value will be saved, and then (later) passed as
+ *                 a parameter to the increment and decrement functions each time they
+ *                 are called.  This function can only be called 
+ *  @param inc     Pointer to a function to do an atomic increment operation.  Must be non-null.
+ *  @param dec     Pointer to a function to do an atomic decrement operation.  Must be non-null.
+ *  @param status  Receives error values.
+ *  @draft ICU 2.8
+ *  @system
+ */  
+U_CAPI void U_EXPORT2 
+u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec,
+                    UErrorCode *status);
+
+
+
+/**
+  *  Pointer type for a user supplied memory allocation function.
+  *  @param context user supplied value, obtained from from u_setMemoryFunctions().
+  *  @param size    The number of bytes to be allocated
+  *  @return        Pointer to the newly allocated memory, or NULL if the allocation failed.
+  *  @draft ICU 2.8
+  *  @system
+  */
+typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
+/**
+  *  Pointer type for a user supplied memory re-allocation function.
+  *  @param context user supplied value, obtained from from u_setMemoryFunctions().
+  *  @param size    The number of bytes to be allocated
+  *  @return        Pointer to the newly allocated memory, or NULL if the allocation failed.
+  *  @draft ICU 2.8
+  *  @system
+  */
+typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size);
+/**
+  *  Pointer type for a user supplied memory free  function.  Behavior should be
+  *  similar the standard C library free().
+  *  @param context user supplied value, obtained from from u_setMemoryFunctions().
+  *  @param mem     Pointer to the memory block to be resized
+  *  @param size    The new size for the block
+  *  @return        Pointer to the resized memory block, or NULL if the resizing failed.
+  *  @draft ICU 2.8
+  *  @system
+  */
+typedef void  U_CALLCONV UMemFreeFn (const void *context, void *mem);
+
+/**
+ *  Set the functions that ICU will use for memory allocation.
+ *  Use of this function is optional; by default (without this function), ICU will
+ *  use the standard C library malloc() and free() functions.
+ *  This function can only be used when ICU is in an initial, unused state, before
+ *  u_init() has been called.
+ *  @param context This pointer value will be saved, and then (later) passed as
+ *                 a parameter to the memory functions each time they
+ *                 are called.
+ *  @param a       Pointer to a user-supplied malloc function.
+ *  @param r       Pointer to a user-supplied realloc function.
+ *  @param f       Pointer to a user-supplied free function.
+ *  @param status  Receives error values.
+ *  @draft ICU 2.8
+ *  @system
+ */  
+U_CAPI void U_EXPORT2 
+u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, 
+                    UErrorCode *status);
 
 #endif

Index: ucnv.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/ucnv.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- ucnv.h	10 Sep 2003 02:42:04 -0000	1.4
+++ ucnv.h	6 Apr 2004 10:08:07 -0000	1.5
@@ -414,6 +414,9 @@
  * stored in the converter cache or the alias table. The only way to open further converters
  * is call this function multiple times, or use the ucnv_safeClone() function to clone a 
  * 'master' converter.</p>
+ *
+ * <p>A future version of ICU may add alias table lookups and/or caching
+ * to this function.</p>
  * 
  * <p>Example Use:
  *      <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code>
@@ -427,7 +430,7 @@
  * @see ucnv_open
  * @see ucnv_safeClone
  * @see ucnv_close
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI UConverter* U_EXPORT2 
 ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);
@@ -454,8 +457,13 @@
                int32_t          *pBufferSize, 
                UErrorCode       *status);
 
-/** @stable ICU 2.0 */
-#define U_CNV_SAFECLONE_BUFFERSIZE  3072
+/**
+ * \def U_CNV_SAFECLONE_BUFFERSIZE
+ * Definition of a buffer size that is designed to be large enough for
+ * converters to be cloned with ucnv_safeClone().
+ * @stable ICU 2.0
+ */
+#define U_CNV_SAFECLONE_BUFFERSIZE  1024
 
 /**
  * Deletes the unicode converter and releases resources associated
@@ -580,10 +588,50 @@
 ucnv_resetFromUnicode(UConverter *converter);
 
 /**
- * Returns the maximum length of bytes used by a character. This varies 
- * between 1 and 4
- * @param converter the Unicode converter
- * @return the maximum number of bytes allowed by this particular converter
+ * Returns the maximum number of bytes that are output per UChar in conversion
+ * from Unicode using this converter.
+ * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING
+ * to calculate the size of a target buffer for conversion from Unicode.
+ *
+ * Note: Before ICU 2.8, this function did not return reliable numbers for
+ * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS.
+ *
+ * This number may not be the same as the maximum number of bytes per
+ * "conversion unit". In other words, it may not be the intuitively expected
+ * number of bytes per character that would be published for a charset,
+ * and may not fulfill any other purpose than the allocation of an output
+ * buffer of guaranteed sufficient size for a given input length and converter.
+ *
+ * Examples for special cases that are taken into account:
+ * - Supplementary code points may convert to more bytes than BMP code points.
+ *   This function returns bytes per UChar (UTF-16 code unit), not per
+ *   Unicode code point, for efficient buffer allocation.
+ * - State-shifting output (SI/SO, escapes, etc.) from stateful converters.
+ * - When m input UChars are converted to n output bytes, then the maximum m/n
+ *   is taken into account.
+ *
+ * The number returned here does not take into account
+ * (see UCNV_GET_MAX_BYTES_FOR_STRING):
+ * - callbacks which output more than one charset character sequence per call,
+ *   like escape callbacks
+ * - initial and final non-character bytes that are output by some converters
+ *   (automatic BOMs, initial escape sequence, final SI, etc.)
+ *
+ * Examples for returned values:
+ * - SBCS charsets: 1
+ * - Shift-JIS: 2
+ * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted)
+ * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_)
+ * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS)
+ * - ISO-2022: 3 (always outputs UTF-8)
+ * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS)
+ * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS)
+ *
+ * @param converter The Unicode converter.
+ * @return The maximum number of bytes per UChar that are output by ucnv_fromUnicode(),
+ *         to be used together with UCNV_GET_MAX_BYTES_FOR_STRING for buffer allocation.
+ *
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
  * @see ucnv_getMinCharSize
  * @stable ICU 2.0
  */
@@ -591,8 +639,30 @@
 ucnv_getMaxCharSize(const UConverter *converter);
 
 /**
+ * Calculates the size of a buffer for conversion from Unicode to a charset.
+ * The calculated size is guaranteed to be sufficient for this conversion.
+ *
+ * It takes into account initial and final non-character bytes that are output
+ * by some converters.
+ * It does not take into account callbacks which output more than one charset
+ * character sequence per call, like escape callbacks.
+ * The default (substitution) callback only outputs one charset character sequence.
+ *
+ * @param length Number of UChars to be converted.
+ * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter
+ *                    that will be used.
+ * @return Size of a buffer that will be large enough to hold the output bytes of
+ *         converting length UChars with the converter that returned the maxCharSize.
+ *
+ * @see ucnv_getMaxCharSize
+ * @draft ICU 2.8
+ */
+#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
+     (((int32_t)(length)+10)*(int32_t)(maxCharSize))
+
+/**
  * Returns the minimum byte length for characters in this codepage. 
- * This is either 1 or 2 for all supported codepages.
+ * This is usually either 1 or 2.
  * @param converter the Unicode converter
  * @return the minimum number of bytes allowed by this particular converter
  * @see ucnv_getMaxCharSize
@@ -856,6 +926,12 @@
  *  consumed. At that point, the caller should reset the source and
  *  sourceLimit pointers to point to the next chunk.
  * 
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set.
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ * 
  * This is a <I>stateful</I> conversion. Additionally, even when all source data has
  * been consumed, some data may be in the converters' internal state.
  * Call this function repeatedly, updating the target pointers with
@@ -918,6 +994,12 @@
  * returned, it means that all of the source buffer has been
  *  consumed. At that point, the caller should reset the source and
  *  sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==TRUE), the input is completely consumed
+ * when *source==sourceLimit and no error code is set
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
  * 
  * This is a <I>stateful</I> conversion. Additionally, even when all source data has
  * been consumed, some data may be in the converters' internal state.
@@ -970,7 +1052,7 @@
  * It is only useful for whole strings, not for streaming conversion.
  *
  * The maximum output buffer capacity required (barring output from callbacks) will be
- * srcLength*ucnv_getMaxCharSize(cnv).
+ * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
  *
  * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
  * @param src the input Unicode string
@@ -986,6 +1068,7 @@
  *         and a buffer of the indicated length would need to be passed in
  * @see ucnv_fromUnicode
  * @see ucnv_convert
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
  * @stable ICU 2.0
  */
 U_CAPI int32_t U_EXPORT2
@@ -1027,11 +1110,39 @@
               UErrorCode *pErrorCode);
 
 /**
- * Will convert a codepage buffer into unicode one character at a time.
- * <p>This function was written to be efficient when transcoding small
- * amounts of data at a time.
- * In that case it will be more efficient than \Ref{ucnv_toUnicode}.
- * When converting large buffers use \Ref{ucnv_toUnicode}.</p>
+ * Convert a codepage buffer into Unicode one character at a time.
+ * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
+ *
+ * Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
+ * - Faster for small amounts of data, for most converters, e.g.,
+ *   US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
+ *   (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
+ *    it uses ucnv_toUnicode() internally.)
+ * - Convenient.
+ *
+ * Limitations compared to ucnv_toUnicode():
+ * - Always assumes flush=TRUE.
+ *   This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
+ *   that is, for where the input is supplied in multiple buffers,
+ *   because ucnv_getNextUChar() will assume the end of the input at the end
+ *   of the first buffer.
+ * - Does not provide offset output.
+ *
+ * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
+ * ucnv_getNextUChar() uses the current state of the converter
+ * (unlike ucnv_toUChars() which always resets first).
+ * However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
+ * stopped in the middle of a character sequence (with flush=FALSE),
+ * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
+ * internally until the next character boundary.
+ * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
+ * start at a character boundary.)
+ *
+ * Instead of using ucnv_getNextUChar(), it is recommended
+ * to convert using ucnv_toUnicode() or ucnv_toUChars()
+ * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
+ * or a C++ CharacterIterator or similar.
+ * This allows streaming conversion and offset output, for example.
  *
  * <p>Handling of surrogate pairs and supplementary-plane code points:<br>
  * There are two different kinds of codepages that provide mappings for surrogate characters:
@@ -1492,7 +1603,7 @@
  * @see ucnv_getStandardName
  * @see uenum_close
  * @see uenum_next
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI UEnumeration * U_EXPORT2
 ucnv_openStandardNames(const char *convName,

Index: ucnv_err.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/ucnv_err.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- ucnv_err.h	10 Sep 2003 02:42:04 -0000	1.4
+++ ucnv_err.h	6 Apr 2004 10:08:08 -0000	1.5
@@ -172,7 +172,7 @@
                               by the new converter, the callback must clone 
                               the data and call ucnv_setFromUCallback 
                               (or setToUCallback) with the correct pointer.
-                              @draft ICU 2.2
+                              @stable ICU 2.2
                            */
 } UConverterCallbackReason;
 

Index: uenum.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uenum.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- uenum.h	10 Sep 2003 02:42:04 -0000	1.1
+++ uenum.h	6 Apr 2004 10:08:08 -0000	1.2
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2002, International Business Machines
+*   Copyright (C) 2002-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@@ -22,10 +22,10 @@
 /**
  * An enumeration object.
  * For usage in C programs.
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 struct UEnumeration;
-/** structure representing an enumeration object instance @draft ICU 2.2 */
+/** structure representing an enumeration object instance @stable ICU 2.2 */
 typedef struct UEnumeration UEnumeration;
 
 /**
@@ -33,7 +33,7 @@
  * does nothing.  After this call, any char* or UChar* pointer
  * returned by uenum_unext() or uenum_next() is invalid.
  * @param en UEnumeration structure pointer
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI void U_EXPORT2
 uenum_close(UEnumeration* en);
@@ -50,7 +50,7 @@
  * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
  *               iterator is out of sync.
  * @return number of elements in the iterator
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI int32_t U_EXPORT2
 uenum_count(UEnumeration* en, UErrorCode* status);
@@ -74,7 +74,7 @@
  *         until the next call to any uenum_... method, including
  *         uenum_next() or uenum_unext().  When all strings have been
  *         traversed, returns NULL.
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI const UChar* U_EXPORT2
 uenum_unext(UEnumeration* en,
@@ -107,7 +107,7 @@
  *         until the next call to any uenum_... method, including
  *         uenum_next() or uenum_unext().  When all strings have been
  *         traversed, returns NULL.
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI const char* U_EXPORT2
 uenum_next(UEnumeration* en,
@@ -121,7 +121,7 @@
  * @param en the iterator object
  * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
  *               the iterator is out of sync with its service.  
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI void U_EXPORT2
 uenum_reset(UEnumeration* en, UErrorCode* status);

Index: uidna.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uidna.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- uidna.h	10 Sep 2003 02:42:04 -0000	1.1
+++ uidna.h	6 Apr 2004 10:08:08 -0000	1.2
@@ -44,7 +44,7 @@
  * once.
  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) 
  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
- *\end_file
+ *
  */
 
 /** 
@@ -84,18 +84,18 @@
  * @param destCapacity      Size of dest.
  * @param options           A bit set of options:
  *
- *  - UIDNA_DEFAULT         Use default options, i.e., do not process unassigned code points
- *                          and do not use STD3 ASCII rules
- *                          If unassigned code points are found the operation fails with 
- *                          U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
+ *                              and do not use STD3 ASCII rules
+ *                              If unassigned code points are found the operation fails with 
+ *                              U_UNASSIGNED_ERROR error code.
  *
- *  - UIDNA_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
- *                          If this option is set, the unassigned code points are in the input 
- *                          are treated as normal Unicode code points.
+ *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
+ *                              If this option is set, the unassigned code points are in the input 
+ *                              are treated as normal Unicode code points.
  *                          
- *  - UIDNA_USE_STD3_RULES  Use STD3 ASCII rules for host name syntax restrictions
- *                          If this option is set and the input does not satisfy STD3 rules,  
- *                          the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
+ *                              If this option is set and the input does not satisfy STD3 rules,  
+ *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  *
  * @param parseError        Pointer to UParseError struct to receive information on position 
  *                          of error if an error is encountered. Can be NULL.
@@ -129,23 +129,23 @@
  * @param destCapacity      Size of dest.
  * @param options           A bit set of options:
  *  
- *  - UIDNA_DEFAULT         Use default options, i.e., do not process unassigned code points
- *                          and do not use STD3 ASCII rules
- *                          If unassigned code points are found the operation fails with 
- *                          U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
+ *                              and do not use STD3 ASCII rules
+ *                              If unassigned code points are found the operation fails with 
+ *                              U_UNASSIGNED_ERROR error code.
  *
- *  - UIDNA_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
- *                          If this option is set, the unassigned code points are in the input 
- *                          are treated as normal Unicode code points. <b> Note: </b> This option is 
- *                          required on toUnicode operation because the RFC mandates 
- *                          verification of decoded ACE input by applying toASCII and comparing
- *                          its output with source
+ *  - UIDNA_ALLOW_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
+ *                              If this option is set, the unassigned code points are in the input 
+ *                              are treated as normal Unicode code points. <b> Note: </b> This option is 
+ *                              required on toUnicode operation because the RFC mandates 
+ *                              verification of decoded ACE input by applying toASCII and comparing
+ *                              its output with source
  *
  *                          
  *                          
- *  - UIDNA_USE_STD3_RULES  Use STD3 ASCII rules for host name syntax restrictions
- *                          If this option is set and the input does not satisfy STD3 rules,  
- *                          the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
+ *                              If this option is set and the input does not satisfy STD3 rules,  
+ *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  *
  * @param parseError        Pointer to UParseError struct to receive information on position 
  *                          of error if an error is encountered. Can be NULL.
@@ -184,18 +184,18 @@
  * @param destCapacity      Size of dest.
  * @param options           A bit set of options:
  *  
- *  - UIDNA_DEFAULT         Use default options, i.e., do not process unassigned code points
- *                          and do not use STD3 ASCII rules
- *                          If unassigned code points are found the operation fails with 
- *                          U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
+ *                              and do not use STD3 ASCII rules
+ *                              If unassigned code points are found the operation fails with 
+ *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
  *
- *  - UIDNA_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
- *                          If this option is set, the unassigned code points are in the input 
- *                          are treated as normal Unicode code points.
+ *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
+ *                              If this option is set, the unassigned code points are in the input 
+ *                              are treated as normal Unicode code points.
  *                          
- *  - UIDNA_USE_STD3_RULES  Use STD3 ASCII rules for host name syntax restrictions
- *                          If this option is set and the input does not satisfy STD3 rules,  
- *                          the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
+ *                              If this option is set and the input does not satisfy STD3 rules,  
+ *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  * 
  * @param parseError        Pointer to UParseError struct to receive information on position 
  *                          of error if an error is encountered. Can be NULL.
@@ -230,18 +230,18 @@
  * @param destCapacity      Size of dest.
  * @param options           A bit set of options:
  *  
- *  - UIDNA_DEFAULT         Use default options, i.e., do not process unassigned code points
- *                          and do not use STD3 ASCII rules
- *                          If unassigned code points are found the operation fails with 
- *                          U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
+ *                              and do not use STD3 ASCII rules
+ *                              If unassigned code points are found the operation fails with 
+ *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
  *
- *  - UIDNA_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
- *                          If this option is set, the unassigned code points are in the input 
- *                          are treated as normal Unicode code points.
+ *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
+ *                              If this option is set, the unassigned code points are in the input 
+ *                              are treated as normal Unicode code points.
  *                          
- *  - UIDNA_USE_STD3_RULES  Use STD3 ASCII rules for host name syntax restrictions
- *                          If this option is set and the input does not satisfy STD3 rules,  
- *                          the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
+ *                              If this option is set and the input does not satisfy STD3 rules,  
+ *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  *
  * @param parseError        Pointer to UParseError struct to receive information on position 
  *                          of error if an error is encountered. Can be NULL.
@@ -262,7 +262,7 @@
                      UErrorCode* status);
 
 /**
- * Compare two strings for IDNs for equivalence.
+ * Compare two IDN strings for equivalence.
  * This function splits the domain names into labels and compares them.
  * According to IDN RFC, whenever two labels are compared, they are 
  * considered equal if and only if their ASCII forms (obtained by 
@@ -277,18 +277,18 @@
  * @param length2           Length of second source string, or -1 if NUL-terminated.
  * @param options           A bit set of options:
  *  
- *  - UIDNA_DEFAULT         Use default options, i.e., do not process unassigned code points
- *                          and do not use STD3 ASCII rules
- *                          If unassigned code points are found the operation fails with 
- *                          U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
+ *                              and do not use STD3 ASCII rules
+ *                              If unassigned code points are found the operation fails with 
+ *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
  *
- *  - UIDNA_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
- *                          If this option is set, the unassigned code points are in the input 
- *                          are treated as normal Unicode code points.
+ *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
+ *                              If this option is set, the unassigned code points are in the input 
+ *                              are treated as normal Unicode code points.
  *                          
- *  - UIDNA_USE_STD3_RULES  Use STD3 ASCII rules for host name syntax restrictions
- *                          If this option is set and the input does not satisfy STD3 rules,  
- *                          the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
+ *                              If this option is set and the input does not satisfy STD3 rules,  
+ *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  *
  * @param status            ICU error code in/out parameter.
  *                          Must fulfill U_SUCCESS before the function call.

Index: uiter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uiter.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- uiter.h	10 Sep 2003 02:42:04 -0000	1.1
+++ uiter.h	6 Apr 2004 10:08:08 -0000	1.2
@@ -242,6 +242,10 @@
  * to save and restore the iterator position more efficiently than with
  * getIndex()/move().
  *
+ * The iterator state is defined as a uint32_t value because it is designed
+ * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
+ * of the character iterator.
+ *
  * With some UCharIterator implementations (e.g., UTF-8),
  * getting and setting the UTF-16 index with existing functions
  * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
@@ -316,6 +320,15 @@
  * Implementations of such C APIs are "callers" of UCharIterator functions;
  * they only use the "public" function pointers and never access the "protected"
  * fields directly.
+ *
+ * The current() and next() functions only check the current index against the
+ * limit, and previous() only checks the current index against the start,
+ * to see if the iterator already reached the end of the iteration range.
+ *
+ * The assumption - in all iterators - is that the index is moved via the API,
+ * which means it won't go out of bounds, or the index is modified by
+ * user code that knows enough about the iterator implementation to set valid
+ * index values.
  *
  * UCharIterator functions return code unit values 0..0xffff,
  * or U_SENTINEL if the iteration bounds are reached.

Index: uloc.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uloc.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- uloc.h	10 Sep 2003 02:42:04 -0000	1.4
+++ uloc.h	6 Apr 2004 10:08:08 -0000	1.5
@@ -22,6 +22,7 @@
 #define ULOC_H
 
 #include "unicode/utypes.h"
+#include "unicode/uenum.h"
 
 /**    
  * \file
@@ -247,6 +248,12 @@
  */
 #define ULOC_LANG_CAPACITY 12
 /**
+ * Useful constant for the maximum size of the script part of a locale ID
+ * (including the terminating NULL).
+ * @draft ICU 2.8
+ */
+#define ULOC_SCRIPT_CAPACITY 6
+/**
  * Useful constant for the maximum size of the country part of a locale ID
  * (including the terminating NULL).
  * @stable ICU 2.0
@@ -257,8 +264,17 @@
  * (including the terminating NULL).
  * @stable ICU 2.0
  */
-#define ULOC_FULLNAME_CAPACITY 50
-
+#define ULOC_FULLNAME_CAPACITY 56
+/**
+ * Useful constant for the maximum size of keywords in a locale
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORDS_CAPACITY 50
+/**
+ * Useful constant for the maximum size of keywords in a locale
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
 
 /**
  * Constants for *_getLocale()
@@ -275,21 +291,29 @@
  * @stable ICU 2.1
  */
 typedef enum {
-  /** This is locale the data actually comes from */
+  /** This is locale the data actually comes from 
+   * @stable ICU 2.1
+   */
   ULOC_ACTUAL_LOCALE    = 0,
-  /** This is the most specific locale supported by ICU */
+  /** This is the most specific locale supported by ICU 
+   * @stable ICU 2.1
+   */
   ULOC_VALID_LOCALE    = 1,
-  /** This is the requested locale */
+  /** This is the requested locale
+   *  @deprecated ICU 2.8 
+   */
   ULOC_REQUESTED_LOCALE = 2,
   ULOC_DATA_LOCALE_TYPE_LIMIT
 } ULocDataLocaleType ;
 
 
 /**
- * Gets ICU's default locale.  This pointer and/or the contents of the pointer may
- * become invalid if the uloc_setDefault() is called, so copy the contents of the
- * pointer before calling uloc_setDefault().
- *
+ * Gets ICU's default locale.  
+ * The returned string is a snapshot in time, and will remain valid
+ *   and unchanged even when uloc_setDefault() is called.
+ *   The returned storage is owned by ICU, and must not be altered or deleted
+ *   by the caller.
+ *  
  * @return the ICU default locale
  * @system
  * @stable ICU 2.0
@@ -298,7 +322,15 @@
 uloc_getDefault(void);
 
 /**
- * Sets ICU's default locale.  Call this once during setup or program initialization.  
+ * Sets ICU's default locale.  
+ *    By default (without calling this function), ICU's default locale will be based
+ *    on information obtained from the underlying system environment.
+ *    <p>
+ *    Changes to ICU's default locale do not propagate back to the
+ *    system environment.
+ *    <p>
+ *    Changes to ICU's default locale to not affect any ICU services that
+ *    may already be open based on the previous default locale value.
  *
  * @param localeID the new ICU default locale. A value of NULL will try to get
  *                 the system's default locale.
@@ -313,12 +345,12 @@
 /**
  * Gets the language code for the specified locale.
  *
- * @param localeID the locale to get the ISO langauge code with
- * @param language the langauge code for localeID
+ * @param localeID the locale to get the ISO language code with
+ * @param language the language code for localeID
  * @param languageCapacity the size of the language buffer to store the  
  * language code with
- * @param err error information if retrieving the  language code failed
- * @return the actual buffer size needed for the  langauge code.  If it's greater 
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code.  If it's greater 
  * than languageCapacity, the returned language code will be truncated.  
  * @stable ICU 2.0
  */
@@ -329,6 +361,24 @@
          UErrorCode* err);
 
 /**
+ * Gets the script code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param script the language code for localeID
+ * @param scriptCapacity the size of the language buffer to store the  
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code.  If it's greater 
+ * than scriptCapacity, the returned language code will be truncated.  
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getScript(const char*    localeID,
+         char* script,
+         int32_t scriptCapacity,
+         UErrorCode* err);
+
+/**
  * Gets the  country code for the specified locale.
  *
  * @param localeID the locale to get the country code with
@@ -363,6 +413,30 @@
         char* variant,
         int32_t variantCapacity,
         UErrorCode* err);
+
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the ICU locale ID to
+ * a certain extent. Upper and lower case are set as needed.
+ * It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name.  If it's greater 
+ * than nameCapacity, the returned full name will be truncated.  
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getName(const char*    localeID,
+         char* name,
+         int32_t nameCapacity,
+         UErrorCode* err);
+
 /**
  * Gets the full name for the specified locale.
  * Note: This has the effect of 'canonicalizing' the string to
@@ -381,7 +455,7 @@
  * @stable ICU 2.0
  */
 U_CAPI int32_t U_EXPORT2
-uloc_getName(const char*    localeID,
+uloc_canonicalize(const char*    localeID,
          char* name,
          int32_t nameCapacity,
          UErrorCode* err);
@@ -389,8 +463,8 @@
 /**
  * Gets the ISO language code for the specified locale.
  *
- * @param localeID the locale to get the ISO langauge code with
- * @return language the ISO langauge code for localeID
+ * @param localeID the locale to get the ISO language code with
+ * @return language the ISO language code for localeID
  * @stable ICU 2.0
  */
 U_CAPI const char* U_EXPORT2
@@ -409,6 +483,7 @@
 
 /**
  * Gets the Win32 LCID value for the specified locale.
+ * If the ICU locale is not recognized by Windows, 0 will be returned.
  *
  * @param localeID the locale to get the Win32 LCID value with
  * @return country the Win32 LCID for localeID
@@ -420,34 +495,57 @@
 /**
  * Gets the language name suitable for display for the specified locale.
  *
- * @param locale the locale to get the ISO langauge code with
- * @param inLocale Specifies the locale to be used to display the name.  In other words,
+ * @param locale the locale to get the ISO language code with
+ * @param displayLocale Specifies the locale to be used to display the name.  In other words,
  *                 if the locale's language code is "en", passing Locale::getFrench() for
  *                 inLocale would result in "Anglais", while passing Locale::getGerman()
  *                 for inLocale would result in "Englisch".
- * @param language the displayable langauge code for localeID
+ * @param language the displayable language code for localeID
  * @param languageCapacity the size of the language buffer to store the  
  * displayable language code with
  * @param status error information if retrieving the displayable language code failed
- * @return the actual buffer size needed for the displayable langauge code.  If it's greater 
+ * @return the actual buffer size needed for the displayable language code.  If it's greater 
  * than languageCapacity, the returned language code will be truncated.  
  * @stable ICU 2.0
  */
 U_CAPI int32_t U_EXPORT2
 uloc_getDisplayLanguage(const char* locale,
-            const char* inLocale,
+            const char* displayLocale,
             UChar* language,
             int32_t languageCapacity,
             UErrorCode* status);
 
 /**
+ * Gets the script name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable script code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name.  In other words,
+ *                 if the locale's language code is "en", passing Locale::getFrench() for
+ *                 inLocale would result in "", while passing Locale::getGerman()
+ *                 for inLocale would result in "". NULL may be used to specify the default.
+ * @param script the displayable country code for localeID
+ * @param scriptCapacity the size of the script buffer to store the  
+ * displayable script code with
+ * @param status error information if retrieving the displayable script code failed
+ * @return the actual buffer size needed for the displayable script code.  If it's greater 
+ * than scriptCapacity, the returned displayable script code will be truncated.  
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayScript(const char* locale,
+            const char* displayLocale,
+            UChar* script,
+            int32_t scriptCapacity,
+            UErrorCode* status);
+
+/**
  * Gets the country name suitable for display for the specified locale.
  *
- * @param locale the locale to get the displayable country code with
- * @param inLocale Specifies the locale to be used to display the name.  In other words,
+ * @param locale the locale to get the displayable country code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name.  In other words,
  *                 if the locale's language code is "en", passing Locale::getFrench() for
  *                 inLocale would result in "Anglais", while passing Locale::getGerman()
- *                 for inLocale would result in "Englisch".
+ *                 for inLocale would result in "Englisch". NULL may be used to specify the default.
  * @param country the displayable country code for localeID
  * @param countryCapacity the size of the country buffer to store the  
  * displayable country code with
@@ -458,20 +556,20 @@
  */
 U_CAPI int32_t U_EXPORT2
 uloc_getDisplayCountry(const char* locale,
-            const char* inLocale,
-            UChar* country,
-            int32_t countryCapacity,
-            UErrorCode* status);    /* NULL may be used to specify the default */
+                       const char* displayLocale,
+                       UChar* country,
+                       int32_t countryCapacity,
+                       UErrorCode* status);
 
 
 /**
- * Gets the variant code suitable for display for the specified locale.
+ * Gets the variant name suitable for display for the specified locale.
  *
- * @param locale the locale to get the displayable variant code with
- * @param inLocale Specifies the locale to be used to display the name.  In other words,
+ * @param locale the locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name.  In other words,
  *                 if the locale's language code is "en", passing Locale::getFrench() for
  *                 inLocale would result in "Anglais", while passing Locale::getGerman()
- *                 for inLocale would result in "Englisch".
+ *                 for inLocale would result in "Englisch". NULL may be used to specify the default.
  * @param variant the displayable variant code for localeID
  * @param variantCapacity the size of the variant buffer to store the 
  * displayable variant code with
@@ -482,19 +580,92 @@
  */
 U_CAPI int32_t U_EXPORT2
 uloc_getDisplayVariant(const char* locale,
-            const char* inLocale,
-               UChar* variant,
-             int32_t variantCapacity,
-            UErrorCode* status);    /* NULL may be used to specify the default */
+                       const char* displayLocale,
+                       UChar* variant,
+                       int32_t variantCapacity,
+                       UErrorCode* status);
 
 /**
+ * Gets the keyword name suitable for display for the specified locale.
+ * E.g: for the locale string de_DE@collation=PHONEBOOK, this API gets the display 
+ * string for the keyword collation. 
+ * Usage:
+ * <code>
+ *    UErrorCode status = U_ZERO_ERROR;
+ *    const char* keyword =NULL;
+ *    int32_t keywordLen = 0;
+ *    int32_t keywordCount = 0;
+ *    UChar displayKeyword[256];
+ *    int32_t displayKeywordLen = 0;
+ *    UEnumeration* keywordEnum = uloc_getKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status);
+ *    for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){
+ *          if(U_FAILURE(status)){
+ *              ...something went wrong so handle the error...
+ *              break;
+ *          }
+ *          // the uenum_next returns NUL terminated string
+ *          keyword = uenum_next(keywordEnum, &keywordLen, &status);
+ *          displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256);
+ *          ... do something interesting .....
+ *    }
+ *    uenum_close(keywordEnum);
+ * </code>
+ * @param keyword           The keyword whose display string needs to be returned.
+ * @param displayLocale     Specifies the locale to be used to display the name.  In other words,
+ *                          if the locale's language code is "en", passing Locale::getFrench() for
+ *                          inLocale would result in "Anglais", while passing Locale::getGerman()
+ *                          for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest              the buffer to which the displayable keyword should be written.
+ * @param destCapacity      The size of the buffer (number of UChars). If it is 0, then
+ *                          dest may be NULL and the function will only return the length of the 
+ *                          result without writing any of the result string (pre-flighting).
+ * @param status            error information if retrieving the displayable string failed. 
+ *                          Should not be NULL and should not indicate failure on entry.
+ * @return the actual buffer size needed for the displayable variant code.  
+ * @see #uloc_getKeywords
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeyword(const char* keyword,
+                       const char* displayLocale,
+                       UChar* dest,
+                       int32_t destCapacity,
+                       UErrorCode* status);
+/**
+ * Gets the value of the keyword suitable for display for the specified locale.
+ * E.g: for the locale string de_DE@collation=PHONEBOOK, this API gets the display 
+ * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.
+ *
+ * @param locale            The locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param keyword           The keyword for whose value should be used.
+ * @param displayLocale     Specifies the locale to be used to display the name.  In other words,
+ *                          if the locale's language code is "en", passing Locale::getFrench() for
+ *                          inLocale would result in "Anglais", while passing Locale::getGerman()
+ *                          for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest              the buffer to which the displayable keyword should be written.
+ * @param destCapacity      The size of the buffer (number of UChars). If it is 0, then
+ *                          dest may be NULL and the function will only return the length of the 
+ *                          result without writing any of the result string (pre-flighting).
+ * @param status            error information if retrieving the displayable string failed. 
+ *                          Should not be NULL and must not indicate failure on entry.
+ * @return the actual buffer size needed for the displayable variant code.  
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeywordValue(   const char* locale,
+                               const char* keyword,
+                               const char* displayLocale,
+                               UChar* dest,
+                               int32_t destCapacity,
+                               UErrorCode* status);
+/**
  * Gets the full name suitable for display for the specified locale.
  *
- * @param localeID the locale to get the displayable name with
+ * @param localeID the locale to get the displayable name with. NULL may be used to specify the default.
  * @param inLocaleID Specifies the locale to be used to display the name.  In other words,
  *                   if the locale's language code is "en", passing Locale::getFrench() for
  *                   inLocale would result in "Anglais", while passing Locale::getGerman()
- *                   for inLocale would result in "Englisch".
+ *                   for inLocale would result in "Englisch". NULL may be used to specify the default.
  * @param result the displayable name for localeID
  * @param maxResultSize the size of the name buffer to store the 
  * displayable full name with
@@ -505,7 +676,7 @@
  */
 U_CAPI int32_t U_EXPORT2
 uloc_getDisplayName(const char* localeID,
-            const char* inLocaleID, /* NULL may be used to specify the default */
+            const char* inLocaleID,
             UChar* result,
             int32_t maxResultSize,
             UErrorCode* err);
@@ -574,6 +745,79 @@
                  char* parent,
                  int32_t parentCapacity,
                  UErrorCode* err);
+
+
+/**
+ * Character separating keywords from the locale string
+ * different for EBCDIC - TODO
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORD_SEPARATOR '@'
+/**
+ * Character for assigning value to a keyword
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORD_ASSIGN '='
+/**
+ * Character separating keywords
+ * @draft ICU 2.8
+ */
+#define ULOC_KEYWORD_ITEM_SEPARATOR ';'
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format.  It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API strips off the keyword part, so "de_DE@collation=phonebook" 
+ * will become "de_DE". 
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name.  If it's greater 
+ * than nameCapacity, the returned full name will be truncated.  
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getBaseName(const char*    localeID,
+         char* name,
+         int32_t nameCapacity,
+         UErrorCode* err);
+
+/**
+ * Gets an enumeration of keywords for the specified locale. Enumeration
+ * must get disposed of by the client using uenum_close function.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param status error information if retrieving the keywords failed
+ * @return enumeration of keywords or NULL if there are no keywords.
+ * @draft ICU 2.8
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openKeywords(const char* localeID,
+                        UErrorCode* status);
+
+/**
+ * Get the value for a keyword. Locale name does not need to be normalized.
+ * 
+ * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK")
+ * @param keywordName name of the keyword for which we want the value. Case insensitive.
+ * @param buffer receiving buffer
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code - buffer not big enough.
+ * @return the length of keyword value
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getKeywordValue(const char* localeID,
+                     const char* keywordName,
+                     char* buffer, int32_t bufferCapacity,
+                     UErrorCode* status);
 
 /*eof*/
 

Index: umachine.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/umachine.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- umachine.h	10 Sep 2003 02:42:04 -0000	1.4
+++ umachine.h	6 Apr 2004 10:08:08 -0000	1.5
@@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 1999-2002, International Business Machines
+*   Copyright (C) 1999-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@@ -188,37 +188,36 @@
 #endif
 
 #if defined(U_INT64_T_UNAVAILABLE)
-#   ifndef INTMAX_MIN
-#       define INTMAX_MIN      INT32_MIN
-#   endif
-#   ifndef INTMAX_MAX
-#       define INTMAX_MAX      INT32_MAX
-#   endif
-#   ifndef UINTMAX_MAX
-#       define UINTMAX_MAX     UINT32_MAX
-#   endif
+# error int64_t is required for decimal format and rule-based number format.
 #else
-#   ifndef INT64_MIN
-/** The smallest value a 64 bit signed integer can hold @stable ICU 2.0 */
-#       define INT64_MIN       ((int64_t)(-9223372036854775807-1))
-#   endif
-#   ifndef INT64_MAX
-/** The largest value a 64 bit signed integer can hold @stable ICU 2.0 */
-#       define INT64_MAX       ((int64_t)(9223372036854775807))
-#   endif
-#   ifndef UINT64_MAX
-/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.0 */
-#       define UINT64_MAX      ((uint64_t)(18446744073709551615))
-#   endif
-#   ifndef INTMAX_MIN
-#       define INTMAX_MIN      INT64_MIN
-#   endif
-#   ifndef INTMAX_MAX
-#       define INTMAX_MAX      INT64_MAX
-#   endif
-#   ifndef UINTMAX_MAX
-#       define UINTMAX_MAX     UINT64_MAX
-#   endif
+# ifndef INT64_C
+/**
+ * Provides a platform independent way to specify a signed 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
+ * @draft ICU 2.8
+ */
+#   define INT64_C(c) c ## LL
+# endif
+# ifndef UINT64_C
+/**
+ * Provides a platform independent way to specify an unsigned 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
+ * @draft ICU 2.8
+ */
+#   define UINT64_C(c) c ## ULL
+# endif
+# ifndef U_INT64_MIN
+/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
+#     define U_INT64_MIN       ((int64_t)(INT64_C(-9223372036854775807)-1))
+# endif
+# ifndef U_INT64_MAX
+/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
+#     define U_INT64_MAX       ((int64_t)(INT64_C(9223372036854775807)))
+# endif
+# ifndef U_UINT64_MAX
+/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
+#     define U_UINT64_MAX      ((uint64_t)(UINT64_C(18446744073709551615)))
+# endif
 #endif
 
 /*==========================================================================*/

Index: umisc.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/umisc.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- umisc.h	10 Sep 2003 02:42:04 -0000	1.3
+++ umisc.h	6 Apr 2004 10:08:08 -0000	1.4
@@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1999-2001, International Business Machines
+*   Copyright (C) 1999-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  umisc.h

Index: unifilt.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/unifilt.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- unifilt.h	10 Sep 2003 02:42:04 -0000	1.1
+++ unifilt.h	6 Apr 2004 10:08:08 -0000	1.2
@@ -1,5 +1,6 @@
 /*
-* Copyright (C) 1999, International Business Machines Corporation and others. All Rights Reserved.
+* Copyright (C) 1999-2003, International Business Machines Corporation and others.
+* All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   11/17/99    aliu        Creation.
@@ -85,40 +86,41 @@
      * UnicodeFunctor API.  Nothing to do.
      * @draft ICU 2.4
      */
-    virtual void setData(const TransliterationRuleData*) {}
+    virtual void setData(const TransliterationRuleData*);
 
     /**
      * ICU "poor man's RTTI", returns a UClassID for the actual class.
      *
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
-    virtual inline UClassID getDynamicClassID() const = 0;
+    virtual UClassID getDynamicClassID() const = 0;
 
-    /**
+    /*
      * ICU "poor man's RTTI", returns a UClassID for this class.
      *
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
-    static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
+/*    static UClassID getStaticClassID() { return (UClassID)&fgClassID; }*/
 
 protected:
 
-    /**
+    /*
+     * Since this class has pure virtual functions,
+     * a constructor can't be used.
      * @stable ICU 2.0
      */
-    UnicodeFilter();
+/*    UnicodeFilter();*/
 
 private:
 
-    /**
+    /*
      * The address of this static class variable serves as this class's ID
      * for ICU "poor man's RTTI".
      */
-    static const char fgClassID;
+/*    static const char fgClassID;*/
 };
 
-inline UnicodeFilter::UnicodeFilter() {}
-inline UnicodeFilter::~UnicodeFilter() {}
+/*inline UnicodeFilter::UnicodeFilter() {}*/
 
 U_NAMESPACE_END
 

Index: unifunct.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/unifunct.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- unifunct.h	10 Sep 2003 02:42:04 -0000	1.1
+++ unifunct.h	6 Apr 2004 10:08:08 -0000	1.2
@@ -27,7 +27,7 @@
  */
 class U_COMMON_API UnicodeFunctor : public UObject {
 
- public:
+public:
 
     /**
      * Destructor
@@ -67,7 +67,7 @@
      */
     virtual UnicodeReplacer* toReplacer() const;
 
-    /**
+    /*
      * Return the class ID for this class.  This is useful only for
      * comparing to a return value from getDynamicClassID().  For example:
      * <pre>
@@ -78,7 +78,7 @@
      * @return          The class ID for all objects of this class.
      * @stable ICU 2.0
      */
-    static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }
+    /*static UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }*/
 
     /**
      * Returns a unique class ID <b>polymorphically</b>.  This method
@@ -115,24 +115,25 @@
      */
     virtual void setData(const TransliterationRuleData*) = 0;
 
- protected:
+protected:
 
     /**
+     * Since this class has pure virtual functions,
+     * a constructor can't be used.
      * @stable ICU 2.0
      */
-    UnicodeFunctor();
+    /*UnicodeFunctor();*/
 
- private:
+private:
 
-    /**
+    /*
      * Class identifier for subclasses of UnicodeFunctor that do not
      * define their class (anonymous subclasses).
      */
-    static const char fgClassID;
+    /*static const char fgClassID;*/
 };
 
-inline UnicodeFunctor::UnicodeFunctor() {}
-inline UnicodeFunctor::~UnicodeFunctor() {}
+/*inline UnicodeFunctor::UnicodeFunctor() {}*/
 
 U_NAMESPACE_END
 

Index: unimatch.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/unimatch.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- unimatch.h	10 Sep 2003 02:42:04 -0000	1.1
+++ unimatch.h	6 Apr 2004 10:08:08 -0000	1.2
@@ -1,5 +1,5 @@
 /*
-* Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved.
+* Copyright (C) 2001-2003, International Business Machines Corporation and others. All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   07/18/01    aliu        Creation.
@@ -66,7 +66,7 @@
      * Destructor.
      * @draft ICU 2.4
      */
-    virtual inline ~UnicodeMatcher() {};
+    virtual ~UnicodeMatcher();
 
     /**
      * Return a UMatchDegree value indicating the degree of match for

Index: uniset.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uniset.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- uniset.h	10 Sep 2003 02:42:04 -0000	1.1
+++ uniset.h	6 Apr 2004 10:08:08 -0000	1.2
@@ -22,7 +22,7 @@
 class SymbolTable;
 class UVector;
 class CaseEquivClass;
-
+class RuleCharacterIterator;
     
 /**
  * A mutable set of Unicode characters and multicharacter strings.  Objects of this class
@@ -282,13 +282,21 @@
      * Minimum value that can be stored in a UnicodeSet.
      * @draft ICU 2.4
      */
+#ifdef U_CYGWIN
+    static U_COMMON_API const UChar32 MIN_VALUE;
+#else
     static const UChar32 MIN_VALUE;
+#endif
 
     /**
      * Maximum value that can be stored in a UnicodeSet.
      * @draft ICU 2.4
      */
+#ifdef U_CYGWIN
+    static U_COMMON_API const UChar32 MAX_VALUE;
+#else
     static const UChar32 MAX_VALUE;
+#endif
 
     //----------------------------------------------------------------
     // Constructors &c
@@ -329,12 +337,33 @@
      * @param pattern a string specifying what characters are in the set
      * @param options bitmask for options to apply to the pattern.
      * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+     * @param symbols a symbol table mapping variable names to values
+     * and stand-in characters to UnicodeSets; may be NULL
      * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
      * contains a syntax error.
      * @internal
      */
     UnicodeSet(const UnicodeString& pattern,
                uint32_t options,
+               const SymbolTable* symbols,
+               UErrorCode& status);
+
+    /**
+     * Constructs a set from the given pattern.  See the class description
+     * for the syntax of the pattern language.
+     * @param pattern a string specifying what characters are in the set
+     * @param pos on input, the position in pattern at which to start parsing.
+     * On output, the position after the last character parsed.
+     * @param options bitmask for options to apply to the pattern.
+     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+     * @param symbols a symbol table mapping variable names to values
+     * and stand-in characters to UnicodeSets; may be NULL
+     * @param status input-output error code
+     * @draft ICU 2.8
+     */
+    UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
+               uint32_t options,
+               const SymbolTable* symbols,
                UErrorCode& status);
 
 #ifdef U_USE_UNICODESET_DEPRECATES
@@ -432,6 +461,8 @@
      * @param pattern a string specifying what characters are in the set
      * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
      * contains a syntax error.
+     * <em> Empties the set passed before applying the pattern.<em>
+     * @return a reference to this
      * @stable ICU 2.0
      */
     virtual UnicodeSet& applyPattern(const UnicodeString& pattern,
@@ -444,12 +475,53 @@
      * @param pattern a string specifying what characters are in the set
      * @param options bitmask for options to apply to the pattern.
      * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+     * @param symbols a symbol table mapping variable names to
+     * values and stand-ins to UnicodeSets; may be NULL
      * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
      * contains a syntax error.
+     *<em> Empties the set passed before applying the pattern.<em>
+     * @return a reference to this
      * @internal
      */
     UnicodeSet& applyPattern(const UnicodeString& pattern,
                              uint32_t options,
+                             const SymbolTable* symbols,
+                             UErrorCode& status);
+
+    /**
+     * Parses the given pattern, starting at the given position.  The
+     * character at pattern.charAt(pos.getIndex()) must be '[', or the
+     * parse fails.  Parsing continues until the corresponding closing
+     * ']'.  If a syntax error is encountered between the opening and
+     * closing brace, the parse fails.  Upon return from a successful
+     * parse, the ParsePosition is updated to point to the character
+     * following the closing ']', and a StringBuffer containing a
+     * pairs list for the parsed pattern is returned.  This method calls
+     * itself recursively to parse embedded subpatterns.
+     *<em> Empties the set passed before applying the pattern.<em>
+     *
+     * @param pattern the string containing the pattern to be parsed.
+     * The portion of the string from pos.getIndex(), which must be a
+     * '[', to the corresponding closing ']', is parsed.
+     * @param pos upon entry, the position at which to being parsing.
+     * The character at pattern.charAt(pos.getIndex()) must be a '['.
+     * Upon return from a successful parse, pos.getIndex() is either
+     * the character after the closing ']' of the parsed pattern, or
+     * pattern.length() if the closing ']' is the last character of
+     * the pattern string.
+     * @param options bitmask for options to apply to the pattern.
+     * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+     * @param symbols a symbol table mapping variable names to
+     * values and stand-ins to UnicodeSets; may be NULL
+     * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+     * contains a syntax error.
+     * @return a reference to this
+     * @draft ICU 2.8
+     */
+    UnicodeSet& applyPattern(const UnicodeString& pattern,
+                             ParsePosition& pos,
+                             uint32_t options,
+                             const SymbolTable* symbols,
                              UErrorCode& status);
 
     /**
@@ -1113,49 +1185,11 @@
 
     const UnicodeString* getString(int32_t index) const;
 
-private:
-
-    static const char fgClassID;
-
     //----------------------------------------------------------------
     // RuleBasedTransliterator support
     //----------------------------------------------------------------
 
-    friend class TransliteratorParser;
-    friend class TransliteratorIDParser;
-
-    friend class RBBIRuleScanner;
-    friend class RegexCompile;
-
-    /**
-     * Constructs a set from the given pattern.  See the class description
-     * for the syntax of the pattern language.
-
-     * @param pattern a string specifying what characters are in the set
-     * @param pos on input, the position in pattern at which to start parsing.
-     * On output, the position after the last character parsed.
-     * @param varNameToChar a mapping from variable names (String) to characters
-     * (Character).  May be null.  If varCharToSet is non-null, then names may
-     * map to either single characters or sets, depending on whether a mapping
-     * exists in varCharToSet.  If varCharToSet is null then all names map to
-     * single characters.
-     * @param varCharToSet a mapping from characters (Character objects from
-     * varNameToChar) to UnicodeSet objects.  May be null.  Is only used if
-     * varNameToChar is also non-null.
-     * @exception <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
-     * contains a syntax error.
-     */
-    UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
-               const SymbolTable& symbols,
-               UErrorCode& status);
-
-    /**
-     * Constructs a set from the given pattern.  Identical to the
-     * 4-parameter ParsePosition contstructor, but does not take a
-     * SymbolTable, and does not recognize embedded variables.
-     */
-    UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
-               uint32_t options, UErrorCode& status);
+private:
 
     /**
      * Returns <tt>true</tt> if this set contains any character whose low byte
@@ -1170,35 +1204,11 @@
     // Implementation: Pattern parsing
     //----------------------------------------------------------------
 
-    /**
-     * Parses the given pattern, starting at the given position.  The
-     * character at pattern.charAt(pos.getIndex()) must be '[', or the
-     * parse fails.  Parsing continues until the corresponding closing
-     * ']'.  If a syntax error is encountered between the opening and
-     * closing brace, the parse fails.  Upon return from a successful
-     * parse, the ParsePosition is updated to point to the character
-     * following the closing ']', and a StringBuffer containing a
-     * pairs list for the parsed pattern is returned.  This method calls
-     * itself recursively to parse embedded subpatterns.
-     *
-     * @param pattern the string containing the pattern to be parsed.
-     * The portion of the string from pos.getIndex(), which must be a
-     * '[', to the corresponding closing ']', is parsed.
-     * @param pos upon entry, the position at which to being parsing.
-     * The character at pattern.charAt(pos.getIndex()) must be a '['.
-     * Upon return from a successful parse, pos.getIndex() is either
-     * the character after the closing ']' of the parsed pattern, or
-     * pattern.length() if the closing ']' is the last character of
-     * the pattern string.
-     * @return a StringBuffer containing a pairs list for the parsed
-     * substring of <code>pattern</code>
-     * @exception U_ILLEGAL_ARGUMENT_ERROR if the parse fails.
-     */
-    void applyPattern(const UnicodeString& pattern,
-                      ParsePosition& pos,
-                      uint32_t options,
+    void applyPattern(RuleCharacterIterator& chars,
                       const SymbolTable* symbols,
-                      UErrorCode& status);
+                      UnicodeString& rebuiltPat,
+                      uint32_t options,
+                      UErrorCode& ec);
 
     //----------------------------------------------------------------
     // Implementation: Utility methods
@@ -1212,13 +1222,6 @@
 
     UBool allocateStrings();
 
-    void _applyPattern(const UnicodeString& pattern,
-                       ParsePosition& pos,
-                       uint32_t options,
-                       const SymbolTable* symbols,
-                       UnicodeString& rebuiltPat,
-                       UErrorCode& status);
-
     UnicodeString& _toPattern(UnicodeString& result,
                               UBool escapeUnprintable) const;
 
@@ -1247,6 +1250,9 @@
     static UBool resemblesPropertyPattern(const UnicodeString& pattern,
                                           int32_t pos);
 
+    static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
+                                          int32_t iterOpts);
+
     /**
      * Parse the given property pattern at the given parse position
      * and set this UnicodeSet to the result.
@@ -1289,6 +1295,10 @@
                                      ParsePosition& ppos,
                                      UErrorCode &ec);
 
+    void applyPropertyPattern(RuleCharacterIterator& chars,
+                              UnicodeString& rebuiltPat,
+                              UErrorCode& ec);
+
     /**
      * A filter that returns TRUE if the given code point should be
      * included in the UnicodeSet being constructed.
@@ -1329,14 +1339,6 @@
 
     static const CaseEquivClass* getCaseMapOf(UChar folded);
 };
-
-inline UClassID
-UnicodeSet::getStaticClassID(void)
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-UnicodeSet::getDynamicClassID(void) const
-{ return UnicodeSet::getStaticClassID(); }
 
 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
     return !operator==(o);

Index: unistr.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/unistr.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- unistr.h	10 Sep 2003 02:42:04 -0000	1.5
+++ unistr.h	6 Apr 2004 10:08:08 -0000	1.6
@@ -21,8 +21,6 @@
 #ifndef UNISTR_H
 #define UNISTR_H
 
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
 #include "unicode/rep.h"
 
 struct UConverter;          // unicode/ucnv.h
@@ -33,7 +31,7 @@
 /**
  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
  * Compare strings in code point order instead of code unit order.
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 #define U_COMPARE_CODE_POINT_ORDER  0x8000
 #endif
@@ -47,7 +45,6 @@
 U_NAMESPACE_BEGIN
 
 class Locale;               // unicode/locid.h
-class UCharReference;
 class StringCharacterIterator;
 class BreakIterator;        // unicode/brkiter.h
 
@@ -1470,8 +1467,10 @@
 
   /**
    * Return the length of the UnicodeString object.  
-   * The length is the number of characters in the text.
+   * The length is the number of UChar code units are in the UnicodeString.
+   * If you want the number of code points, please use countChar32().
    * @return the length of the UnicodeString object
+   * @see countChar32
    * @stable ICU 2.0
    */
   inline int32_t length(void) const;
@@ -1486,6 +1485,7 @@
    * @param start the index of the first code unit to check
    * @param length the number of UChar code units to check
    * @return the number of code points in the specified code units
+   * @see length
    * @stable ICU 2.0
    */
   int32_t
@@ -1622,7 +1622,7 @@
    * @param srcStart the offset into <TT>srcText</TT> where new characters
    * will be obtained
    * @return a reference to this
-   * @draft ICU2.2
+   * @stable ICU 2.2
    */
   inline UnicodeString& setTo(const UnicodeString& srcText, 
                int32_t srcStart);
@@ -2366,6 +2366,7 @@
    *                  that are to be titlecased.
    *                  If none is provided (0), then a standard titlecase
    *                  break iterator is opened.
+   *                  Otherwise the provided iterator is set to the string's text.
    * @return A reference to this.
    * @stable ICU 2.1
    */
@@ -2393,6 +2394,7 @@
    *                  that are to be titlecased.
    *                  If none is provided (0), then a standard titlecase
    *                  break iterator is opened.
+   *                  Otherwise the provided iterator is set to the string's text.
    * @param locale    The locale to consider.
    * @return A reference to this.
    * @stable ICU 2.1
@@ -2548,7 +2550,7 @@
    *
    * @see getBuffer(int32_t minCapacity)
    * @see getBuffer()
-   * @draft ICU 2.2
+   * @stable ICU 2.2
    */
   inline const UChar *getTerminatedBuffer();
 
@@ -2717,7 +2719,7 @@
    * 'Substring' constructor from tail of source string.
    * @param src The UnicodeString object to copy.
    * @param srcStart The offset into <tt>src</tt> at which to start copying.
-   * @draft ICU2.2
+   * @stable ICU 2.2
    */
   UnicodeString(const UnicodeString& src, int32_t srcStart);
 
@@ -2726,7 +2728,7 @@
    * @param src The UnicodeString object to copy.
    * @param srcStart The offset into <tt>src</tt> at which to start copying.
    * @param srcLength The number of characters from <tt>src</tt> to copy.
-   * @draft ICU2.2
+   * @stable ICU 2.2
    */
   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
 
@@ -2814,18 +2816,18 @@
   UChar32 unescapeAt(int32_t &offset) const;
 
   /**
-   * ICU "poor man's RTTI", returns a UClassID for the actual class.
+   * ICU "poor man's RTTI", returns a UClassID for this class.
    *
-   * @draft ICU 2.2
+   * @stable ICU 2.2
    */
-  virtual inline UClassID getDynamicClassID() const;
+  static UClassID getStaticClassID();
 
   /**
-   * ICU "poor man's RTTI", returns a UClassID for this class.
+   * ICU "poor man's RTTI", returns a UClassID for the actual class.
    *
-   * @draft ICU 2.2
+   * @stable ICU 2.2
    */
-  static inline UClassID getStaticClassID();
+  virtual UClassID getDynamicClassID() const;
 
   //========================================
   // Implementation methods
@@ -3067,15 +3069,23 @@
   uint16_t  fFlags;         // bit flags: see constants above
   UChar     fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
 
-  /**
-   * The address of this static class variable serves as this class's ID
-   * for ICU "poor man's RTTI".
-   */
-  static const char fgClassID;
 };
 
+/**
+ * Create a new UnicodeString with the concatenation of two others.
+ *
+ * @param s1 The first string to be copied to the new one.
+ * @param s2 The second string to be copied to the new one, after s1.
+ * @return UnicodeString(s1).append(s2)
+ * @draft ICU 2.8
+ */
+U_COMMON_API UnicodeString
+operator+ (const UnicodeString &s1, const UnicodeString &s2);
+
 U_NAMESPACE_END
 
+// inline implementations -------------------------------------------------- ***
+
 //========================================
 // Array copying
 //========================================
@@ -3145,14 +3155,6 @@
 //========================================
 // Read-only implementation methods
 //========================================
-inline UClassID
-UnicodeString::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-UnicodeString::getDynamicClassID() const
-{ return UnicodeString::getStaticClassID(); }
-
 inline int32_t  
 UnicodeString::length() const
 { return fLength; }
@@ -4015,7 +4017,6 @@
 UnicodeString::reverse(int32_t start,
                int32_t _length)
 { return doReverse(start, _length); }
-
 
 U_NAMESPACE_END
 

Index: unorm.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/unorm.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- unorm.h	10 Sep 2003 02:42:04 -0000	1.4
+++ unorm.h	6 Apr 2004 10:08:08 -0000	1.5
@@ -271,7 +271,7 @@
  *         "mode" normalization form.
  *
  * @see unorm_quickCheck
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI UBool U_EXPORT2
 unorm_isNormalized(const UChar *src, int32_t srcLength,
@@ -460,14 +460,14 @@
 /**
  * Option bit for unorm_compare:
  * Both input strings are assumed to fulfill FCD conditions.
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 #define UNORM_INPUT_IS_FCD          0x20000
 
 /**
  * Option bit for unorm_compare:
  * Perform case-insensitive comparison.
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 #define U_COMPARE_IGNORE_CASE       0x10000
 
@@ -476,7 +476,7 @@
 /**
  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
  * Compare strings in code point order instead of code unit order.
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 #define U_COMPARE_CODE_POINT_ORDER  0x8000
 #endif
@@ -562,7 +562,7 @@
  * @see u_strCompare
  * @see u_strCaseCompare
  *
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI int32_t U_EXPORT2
 unorm_compare(const UChar *s1, int32_t length1,

Index: uobject.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uobject.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- uobject.h	10 Sep 2003 02:42:04 -0000	1.1
+++ uobject.h	6 Apr 2004 10:08:09 -0000	1.2
@@ -34,7 +34,7 @@
  *         applications that statically link the C Runtime library, meaning that
  *         the app and ICU will be using different heaps.
  *
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */                              
 #ifndef U_OVERRIDE_CXX_ALLOCATION
 #define U_OVERRIDE_CXX_ALLOCATION 1
@@ -151,23 +151,23 @@
  * This is because some compilers do not support covariant (same-as-this)
  * return types; cast to the appropriate subclass if necessary.
  *
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 class U_COMMON_API UObject : public UMemory {
 public:
     /**
      * Destructor.
      *
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
-    virtual inline ~UObject() {}
+    virtual ~UObject();
 
     /**
      * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
      *
-     * @draft ICU 2.2
+     * @stable ICU 2.2
      */
-    virtual inline UClassID getDynamicClassID() const = 0;
+    virtual UClassID getDynamicClassID() const = 0;
 
 protected:
     // the following functions are protected to prevent instantiation and
@@ -181,7 +181,7 @@
     // commented out because UObject is abstract (see getDynamicClassID)
     // inline UObject(const UObject &other) {}
 
-#if U_ICU_VERSION_MAJOR_NUM>2 || (U_ICU_VERSION_MAJOR_NUM==2 && U_ICU_VERSION_MINOR_NUM>6)
+#if U_ICU_VERSION_MAJOR_NUM>2
     // TODO post ICU 2.4  (This comment inserted in 2.2)
     // some or all of the following "boilerplate" functions may be made public
     // in a future ICU4C release when all subclasses implement them
@@ -213,6 +213,23 @@
     UObject &UObject::operator=(const UObject &);
      */
 };
+
+/**
+ * This is a simple macro to add ICU RTTI to an ICU object implementation.
+ * This does not go into the header. This should only be used in *.cpp files.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
+    UClassID myClass::getStaticClassID() { \
+        static const char classID = 0; \
+        return (UClassID)&classID; \
+    } \
+    UClassID myClass::getDynamicClassID() const \
+    { return myClass::getStaticClassID(); }
+
+
 
 U_NAMESPACE_END
 

Index: urename.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/urename.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- urename.h	10 Sep 2003 02:42:04 -0000	1.5
+++ urename.h	6 Apr 2004 10:08:09 -0000	1.6
@@ -29,1358 +29,1377 @@
 
 /* C exports renaming data */
 
-#define T_CString_integerToString T_CString_integerToString_2_6
-#define T_CString_stricmp T_CString_stricmp_2_6
-#define T_CString_stringToInteger T_CString_stringToInteger_2_6
-#define T_CString_strnicmp T_CString_strnicmp_2_6
-#define T_CString_toLowerCase T_CString_toLowerCase_2_6
-#define T_CString_toUpperCase T_CString_toUpperCase_2_6
-#define T_FileStream_close T_FileStream_close_2_6
-#define T_FileStream_eof T_FileStream_eof_2_6
[...2693 lines suppressed...]
+#define UnicodeFilter UnicodeFilter_2_8
+#define UnicodeFunctor UnicodeFunctor_2_8
+#define UnicodeMatcher UnicodeMatcher_2_8
+#define UnicodeNameTransliterator UnicodeNameTransliterator_2_8
+#define UnicodeReplacer UnicodeReplacer_2_8
+#define UnicodeSet UnicodeSet_2_8
+#define UnicodeSetIterator UnicodeSetIterator_2_8
+#define UnicodeString UnicodeString_2_8
+#define UnicodeToHexTransliterator UnicodeToHexTransliterator_2_8
+#define UppercaseTransliterator UppercaseTransliterator_2_8
+#define ValueRecord ValueRecord_2_8
+#define ValueRuns ValueRuns_2_8
+#define locale_set_default_internal locale_set_default_internal_2_8
+#define util64_fromDouble util64_fromDouble_2_8
+#define util64_pow util64_pow_2_8
+#define util64_tou util64_tou_2_8
+#define util64_utoi util64_utoi_2_8
 
 #endif
 #endif

Index: ures.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/ures.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- ures.h	10 Sep 2003 02:42:05 -0000	1.5
+++ ures.h	6 Apr 2004 10:08:09 -0000	1.6
@@ -83,6 +83,14 @@
     URES_ALIAS=3,
 
     /**
+     * Internal use only.
+     * Alternative resource type constant for tables of key-value pairs.
+     * Never returned by ures_getType().
+     * @internal
+     */
+    URES_TABLE32=4,
+
+    /**
      * Resource type constant for a single 28-bit integer, interpreted as
      * signed or unsigned by the ures_getInt() or ures_getUInt() function.
      * @see ures_getInt
@@ -214,7 +222,7 @@
  *                <STRONG>Tables</STRONG>: returns the number of resources in the table
  *                <STRONG>single string</STRONG>: returns 1
  *@see ures_getSize
- * @stable ICU 2.0
+ * @deprecated ICU 2.8 User ures_getSize instead
  */
 U_CAPI int32_t U_EXPORT2 
 ures_countArrayItems(const UResourceBundle* resourceBundle,
@@ -239,7 +247,7 @@
  * @return  A version number string as specified in the resource bundle or its parent.
  *          The caller does not own this string.
  * @see ures_getVersion
- * @stable ICU 2.0
+ * @deprecated ICU 2.8 Use ures_getVersion instead.
  */
 U_CAPI const char* U_EXPORT2 
 ures_getVersionNumber(const UResourceBundle*   resourceBundle);
@@ -267,12 +275,31 @@
  * @param resourceBundle resource bundle in question
  * @param status just for catching illegal arguments
  * @return  A Locale name
- * @stable ICU 2.0
+ * @deprecated ICU 2.8 Use ures_getLocaleByType instead.
  */
 U_CAPI const char* U_EXPORT2 
 ures_getLocale(const UResourceBundle* resourceBundle, 
                UErrorCode* status);
 
+
+/**
+ * Return the name of the Locale associated with this ResourceBundle. 
+ * You can choose between requested, valid and real locale.
+ *
+ * @param resourceBundle resource bundle in question
+ * @param type You can choose between requested, valid and actual
+ *             locale. For description see the definition of
+ *             ULocDataLocaleType in uloc.h
+ * @param status just for catching illegal arguments
+ * @return  A Locale name
+ * @draft ICU 2.8
+ */
+U_CAPI const char* U_EXPORT2 
+ures_getLocaleByType(const UResourceBundle* resourceBundle, 
+                     ULocDataLocaleType type, 
+                     UErrorCode* status);
+
+
 /**
  * Same as ures_open() but uses the fill-in parameter instead of allocating
  * a bundle, if r!=NULL.
@@ -401,10 +428,9 @@
 /**
  * Returns the size of a resource. Size for scalar types is always 1, 
  * and for vector/table types is the number of child resources.
- * @warning Currently, this function works correctly for string, table and 
- *          array resources. For other types of resources, the result is
- *          undefined. This is a bug and will be fixed.
- *
+ * @warning Integer array is treated as a scalar type. There are no 
+ *          APIs to access individual members of an integer array. It
+ *          is always returned as a whole.
  * @param resourceBundle a resource
  * @return number of resources in a given resource.
  * @stable ICU 2.0

Index: uscript.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uscript.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- uscript.h	10 Sep 2003 02:42:05 -0000	1.5
+++ uscript.h	6 Apr 2004 10:08:09 -0000	1.6
@@ -19,7 +19,7 @@
 /**
  * Constants for Unicode script values from ScriptNames.txt .
  *
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 typedef enum UScriptCode {
       USCRIPT_INVALID_CODE = -1,
@@ -65,7 +65,7 @@
       USCRIPT_TIBETAN      = 39,  /* Tibt */
       /** Canadian_Aboriginal script. @draft ICU 2.6 */
       USCRIPT_CANADIAN_ABORIGINAL = 40,  /* Cans */
-      /** Canadian_Aboriginal script (alias). @draft ICU 2.2 */
+      /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
       USCRIPT_UCAS         = USCRIPT_CANADIAN_ABORIGINAL,
       USCRIPT_YI           = 41,  /* Yiii */
       USCRIPT_TAGALOG      = 42,  /* Tglg */

Index: uset.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uset.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- uset.h	10 Sep 2003 02:42:05 -0000	1.1
+++ uset.h	6 Apr 2004 10:08:09 -0000	1.2
@@ -40,14 +40,14 @@
 #endif
 
 /**
- * Bitmask values to be passed to the UnicodeSet constructor or
- * applyPattern() taking an option parameter.
- * @draft
+ * Bitmask values to be passed to uset_openPatternOptions() or
+ * uset_applyPattern() taking an option parameter.
+ * @draft ICU 2.4
  */
 enum {
     /**
      * Ignore white space within patterns unless quoted or escaped.
-     * @draft
+     * @draft ICU 2.4
      */
     USET_IGNORE_SPACE = 1,  
 
@@ -55,7 +55,7 @@
      * Enable case insensitive matching.  E.g., "[ab]" with this flag
      * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
      * match all except 'a', 'A', 'b', and 'B'.
-     * @draft
+     * @draft ICU 2.4
      */
     USET_CASE_INSENSITIVE = 2,  
 
@@ -155,6 +155,32 @@
 uset_close(USet* set);
 
 /**
+ * Modifies the set to represent the set specified by the given
+ * pattern. See the UnicodeSet class description for the syntax of 
+ * the pattern language. See also the User Guide chapter about UnicodeSet.
+ * <em>Empties the set passed before applying the pattern.</em>
+ * @param set               The set to which the pattern is to be applied. 
+ * @param pattern           A pointer to UChar string specifying what characters are in the set.
+ *                          The character at pattern[0] must be a '['.
+ * @param patternLength     The length of the UChar string. -1 if NUL terminated.
+ * @param options           A bitmask for options to apply to the pattern.
+ *                          Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
+ * @param status            Returns an error if the pattern cannot be parsed.
+ * @return                  Upon successful parse, the value is either
+ *                          the index of the character after the closing ']' 
+ *                          of the parsed pattern.
+ *                          If the status code indicates failure, then the return value 
+ *                          is the index of the error in the source.
+ *                                  
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2 
+uset_applyPattern(USet *set,
+                  const UChar *pattern, int32_t patternLength,
+                  uint32_t options,
+                  UErrorCode *status);
+
+/**
  * Returns a string representation of this set.  If the result of
  * calling this function is passed to a uset_openPattern(), it
  * will produce another set that is equal to this one.
@@ -205,7 +231,7 @@
  * @param set the object to which to add the character
  * @param start the first character of the range to add, inclusive
  * @param end the last character of the range to add, inclusive
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI void U_EXPORT2
 uset_addRange(USet* set, UChar32 start, UChar32 end);
@@ -237,7 +263,7 @@
  * @param set the object to which to add the character
  * @param start the first character of the range to remove, inclusive
  * @param end the last character of the range to remove, inclusive
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI void U_EXPORT2
 uset_removeRange(USet* set, UChar32 start, UChar32 end);
@@ -299,7 +325,7 @@
  * @param start the first character of the range to test, inclusive
  * @param end the last character of the range to test, inclusive
  * @return TRUE if set contains the range
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI UBool U_EXPORT2
 uset_containsRange(const USet* set, UChar32 start, UChar32 end);

Index: usetiter.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/usetiter.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- usetiter.h	10 Sep 2003 02:42:05 -0000	1.1
+++ usetiter.h	6 Apr 2004 10:08:09 -0000	1.2
@@ -48,7 +48,7 @@
  * }
  * </pre>
  * @author M. Davis
- * @draft ICU 2.2
+ * @draft ICU 2.4
  */
 class U_COMMON_API UnicodeSetIterator : public UObject {
 
@@ -205,18 +205,18 @@
     void reset();
     
     /**
-     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     * ICU "poor man's RTTI", returns a UClassID for this class.
      *
-     * @draft ICU 2.2
+     * @draft ICU 2.4
      */
-    virtual inline UClassID getDynamicClassID() const;
+    static UClassID getStaticClassID();
 
     /**
-     * ICU "poor man's RTTI", returns a UClassID for this class.
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
      *
-     * @draft ICU 2.2
+     * @draft ICU 2.4
      */
-    static inline UClassID getStaticClassID();
+    virtual UClassID getDynamicClassID() const;
 
     // ======================= PRIVATES ===========================
     
@@ -270,22 +270,7 @@
      */
     virtual void loadRange(int32_t range);
 
-private:
-
-    /**
-     * The address of this static class variable serves as this class's ID
-     * for ICU "poor man's RTTI".
-     */
-    static const char fgClassID;
 };
-
-inline UClassID
-UnicodeSetIterator::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-UnicodeSetIterator::getDynamicClassID() const
-{ return UnicodeSetIterator::getStaticClassID(); }
 
 inline UBool UnicodeSetIterator::isString() const {
     return codepoint == (UChar32)IS_STRING;

Index: ustring.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/ustring.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- ustring.h	10 Sep 2003 02:42:05 -0000	1.4
+++ ustring.h	6 Apr 2004 10:08:09 -0000	1.5
@@ -453,7 +453,7 @@
  *
  * @return <0 or 0 or >0 as usual for string comparisons
  *
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI int32_t U_EXPORT2
 u_strCompare(const UChar *s1, int32_t length1,
@@ -488,7 +488,7 @@
 /**
  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
  * Compare strings in code point order instead of code unit order.
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 #define U_COMPARE_CODE_POINT_ORDER  0x8000
 #endif
@@ -531,7 +531,7 @@
  *
  * @return <0 or 0 or >0 as usual for string comparisons
  *
- * @draft ICU 2.2
+ * @stable ICU 2.2
  */
 U_CAPI int32_t U_EXPORT2
 u_strCaseCompare(const UChar *s1, int32_t length1,

Index: utf.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/utf.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- utf.h	10 Sep 2003 02:42:05 -0000	1.4
+++ utf.h	6 Apr 2004 10:08:09 -0000	1.5
@@ -258,6 +258,22 @@
          !U_IS_UNICODE_NONCHAR(c)))
 
 /**
+ * Is this code point a BMP code point (U+0000..U+ffff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @draft ICU 2.8
+ */
+#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff)
+
+/**
+ * Is this code point a supplementary code point (U+10000..U+10ffff)?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @draft ICU 2.8
+ */
+#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff)
+
+/**
  * Is this code point a lead surrogate (U+d800..U+dbff)?
  * @param c 32-bit code point
  * @return TRUE or FALSE

Index: utf16.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/utf16.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- utf16.h	10 Sep 2003 02:42:05 -0000	1.3
+++ utf16.h	6 Apr 2004 10:08:09 -0000	1.4
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999-2001, International Business Machines
+*   Copyright (C) 1999-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************

Index: utf8.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/utf8.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- utf8.h	10 Sep 2003 02:42:05 -0000	1.4
+++ utf8.h	6 Apr 2004 10:08:09 -0000	1.5
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999-2001, International Business Machines
+*   Copyright (C) 1999-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@@ -156,7 +156,7 @@
  * byte sequence.
  * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param i string offset
  * @param c output UChar32 variable
  * @see U8_GET
@@ -178,7 +178,7 @@
  * c is set to a negative value.
  * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param start starting string offset
  * @param i string offset, start<=i<length
  * @param length string length
@@ -205,7 +205,7 @@
  * The result is undefined if the offset points to a trail byte
  * or an illegal UTF-8 sequence.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param i string offset
  * @param c output UChar32 variable
  * @see U8_NEXT
@@ -241,7 +241,7 @@
  * If the offset points to a trail byte or an illegal UTF-8 sequence, then
  * c is set to a negative value.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param i string offset, i<length
  * @param length string length
  * @param c output UChar32 variable, set to <0 in case of an error
@@ -250,9 +250,9 @@
  */
 #define U8_NEXT(s, i, length, c) { \
     (c)=(s)[(i)++]; \
-    if((c)>=0x80) { \
+    if(((uint8_t)(c))>=0x80) { \
         if(U8_IS_LEAD(c)) { \
-            (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, -1); \
+            (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -1); \
         } else { \
             (c)=U_SENTINEL; \
         } \
@@ -266,7 +266,7 @@
  * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
  * Otherwise, the result is undefined.
  *
- * @param s const UChar * string buffer
+ * @param s const uint8_t * string buffer
  * @param i string offset
  * @param c code point to append
  * @see U8_APPEND
@@ -300,7 +300,7 @@
  * If the code point is not valid or trail bytes do not fit,
  * then isError is set to TRUE.
  *
- * @param s const UChar * string buffer
+ * @param s const uint8_t * string buffer
  * @param i string offset, i<length
  * @param length size of the string buffer
  * @param c code point to append
@@ -321,7 +321,7 @@
  * (Post-incrementing iteration.)
  * "Unsafe" macro, assumes well-formed UTF-8.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param i string offset
  * @see U8_FWD_1
  * @draft ICU 2.4
@@ -335,7 +335,7 @@
  * (Post-incrementing iteration.)
  * "Safe" macro, checks for illegal sequences and for string boundaries.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param i string offset, i<length
  * @param length string length
  * @see U8_FWD_1_UNSAFE
@@ -361,7 +361,7 @@
  * (Post-incrementing iteration.)
  * "Unsafe" macro, assumes well-formed UTF-8.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param i string offset
  * @param n number of code points to skip
  * @see U8_FWD_N
@@ -381,7 +381,7 @@
  * (Post-incrementing iteration.)
  * "Safe" macro, checks for illegal sequences and for string boundaries.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param i string offset, i<length
  * @param length string length
  * @param n number of code points to skip
@@ -404,7 +404,7 @@
  * Otherwise, it is not modified.
  * "Unsafe" macro, assumes well-formed UTF-8.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param i string offset
  * @see U8_SET_CP_START
  * @draft ICU 2.4
@@ -421,7 +421,7 @@
  * Otherwise, it is not modified.
  * "Safe" macro, checks for illegal sequences and for string boundaries.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param start starting string offset (usually 0)
  * @param i string offset, start<=i
  * @see U8_SET_CP_START_UNSAFE
@@ -448,7 +448,7 @@
  * will be returned as the code point.
  * The result is undefined if the offset is behind an illegal UTF-8 sequence.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param i string offset
  * @param c output UChar32 variable
  * @see U8_PREV
@@ -489,7 +489,7 @@
  * will be returned as the code point.
  * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param start starting string offset (usually 0)
  * @param i string offset, start<=i
  * @param c output UChar32 variable, set to <0 in case of an error
@@ -513,7 +513,7 @@
  * The input offset may be the same as the string length.
  * "Unsafe" macro, assumes well-formed UTF-8.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param i string offset
  * @see U8_BACK_1
  * @draft ICU 2.4
@@ -528,7 +528,7 @@
  * The input offset may be the same as the string length.
  * "Safe" macro, checks for illegal sequences and for string boundaries.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param start starting string offset (usually 0)
  * @param i string offset, start<=i
  * @see U8_BACK_1_UNSAFE
@@ -547,7 +547,7 @@
  * The input offset may be the same as the string length.
  * "Unsafe" macro, assumes well-formed UTF-8.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param i string offset
  * @param n number of code points to skip
  * @see U8_BACK_N
@@ -568,7 +568,7 @@
  * The input offset may be the same as the string length.
  * "Safe" macro, checks for illegal sequences and for string boundaries.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param start index of the start of the string
  * @param i string offset, i<length
  * @param n number of code points to skip
@@ -591,7 +591,7 @@
  * The input offset may be the same as the string length.
  * "Unsafe" macro, assumes well-formed UTF-8.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param i string offset
  * @see U8_SET_CP_LIMIT
  * @draft ICU 2.4
@@ -609,7 +609,7 @@
  * The input offset may be the same as the string length.
  * "Safe" macro, checks for illegal sequences and for string boundaries.
  *
- * @param s const UChar * string
+ * @param s const uint8_t * string
  * @param start starting string offset (usually 0)
  * @param i string offset, start<=i<=length
  * @param length string length

Index: utypes.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/utypes.h,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -d -r1.9 -r1.10
--- utypes.h	10 Sep 2003 02:42:05 -0000	1.9
+++ utypes.h	6 Apr 2004 10:08:09 -0000	1.10
@@ -528,9 +528,9 @@
     U_MEMORY_ALLOCATION_ERROR =  7,     /**< Memory allocation error */
     U_INDEX_OUTOFBOUNDS_ERROR =  8,     /**< Trying to access the index that is out of bounds */
     U_PARSE_ERROR             =  9,     /**< Equivalent to Java ParseException */
-    U_INVALID_CHAR_FOUND      = 10,     /**< In the Character conversion routines: Invalid character or sequence was encountered. In other APIs: Invalid character or code point name. */
-    U_TRUNCATED_CHAR_FOUND    = 11,     /**< In the Character conversion routines: More bytes are required to complete the conversion successfully */
-    U_ILLEGAL_CHAR_FOUND      = 12,     /**< In codeset conversion: a sequence that does NOT belong in the codepage has been encountered */
+    U_INVALID_CHAR_FOUND      = 10,     /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
+    U_TRUNCATED_CHAR_FOUND    = 11,     /**< Character conversion: Incomplete input sequence. */
+    U_ILLEGAL_CHAR_FOUND      = 12,     /**< Character conversion: Illegal input sequence/combination of input units.. */
     U_INVALID_TABLE_FORMAT    = 13,     /**< Conversion table file found, but corrupted */
     U_INVALID_TABLE_FILE      = 14,     /**< Conversion table file not found */
     U_BUFFER_OVERFLOW_ERROR   = 15,     /**< A result would not fit in the supplied buffer */
@@ -546,6 +546,7 @@
                                              It is very possible that a circular alias definition has occured */
     U_ENUM_OUT_OF_SYNC_ERROR  = 25,     /**< UEnumeration out of sync with underlying collection */
     U_INVARIANT_CONVERSION_ERROR = 26,  /**< Unable to convert a UChar* string to char* with the invariant converter. */
+    U_INVALID_STATE_ERROR     = 27,     /**< Requested operation can not be completed with ICU in its current state */
 
     U_STANDARD_ERROR_LIMIT,             /**< This must always be the last value to indicate the limit for standard errors */
     /*
@@ -624,6 +625,7 @@
     U_BRK_UNDEFINED_VARIABLE,              /**< Use of an undefined $Variable in an RBBI rule.    */
     U_BRK_INIT_ERROR,                      /**< Initialization failure.  Probable missing ICU Data. */
     U_BRK_RULE_EMPTY_SET,                  /**< Rule contains an empty Unicode Set.               */
+    U_BRK_UNRECOGNIZED_OPTION,             /**< !!option in RBBI rules not recognized.            */
     U_BRK_ERROR_LIMIT,                     /**< This must always be the last value to indicate the limit for Break Iterator failures */
 
     /*
@@ -650,16 +652,23 @@
       * The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes
       */
       U_IDNA_ERROR_START=0x10400,
-      U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR,
-      U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR,
+      U_IDNA_PROHIBITED_ERROR,
+      U_IDNA_UNASSIGNED_ERROR,
       U_IDNA_CHECK_BIDI_ERROR,
       U_IDNA_STD3_ASCII_RULES_ERROR,
       U_IDNA_ACE_PREFIX_ERROR,
       U_IDNA_VERIFICATION_ERROR,
       U_IDNA_LABEL_TOO_LONG_ERROR,
       U_IDNA_ERROR_LIMIT,
+      /*
+       * Aliases for StringPrep
+       */
+      U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
+      U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
+      U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
+      
 
-    U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT      /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
+      U_ERROR_LIMIT=U_IDNA_ERROR_LIMIT      /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */
 } UErrorCode;
 
 /* Use the following to determine if an UErrorCode represents */

Index: uversion.h
===================================================================
RCS file: /cvs/core/icu-sword/source/common/unicode/uversion.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- uversion.h	10 Sep 2003 02:42:05 -0000	1.5
+++ uversion.h	6 Apr 2004 10:08:09 -0000	1.6
@@ -61,7 +61,7 @@
  *  This value will change in the subsequent releases of ICU
  *  @draft ICU 2.6
  */
-#define U_ICU_VERSION_MINOR_NUM 6
+#define U_ICU_VERSION_MINOR_NUM 8
 
 /** The current ICU patchlevel version as an integer.  
  *  This value will change in the subsequent releases of ICU
@@ -73,20 +73,20 @@
  *  This value will change in the subsequent releases of ICU
  *  @draft ICU 2.6
  */
-#define U_ICU_VERSION_SUFFIX _2_6
+#define U_ICU_VERSION_SUFFIX _2_8
 
 /** The current ICU library version as a dotted-decimal string. The patchlevel
  *  only appears in this string if it non-zero. 
  *  This value will change in the subsequent releases of ICU
  *  @draft ICU 2.4
  */
-#define U_ICU_VERSION "2.6"
+#define U_ICU_VERSION "2.8"
 
 /** The current ICU library major/minor version as a string without dots, for library name suffixes. 
  *  This value will change in the subsequent releases of ICU
  *  @draft ICU 2.6
  */
-#define U_ICU_VERSION_SHORT "26"
+#define U_ICU_VERSION_SHORT "28"
 
 /** An ICU version consists of up to 4 numbers from 0..255.
  *  @draft ICU 2.4
@@ -109,7 +109,7 @@
 typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
 
 #if U_HAVE_NAMESPACE && defined(XP_CPLUSPLUS)
-#define U_ICU_NAMESPACE icu_2_6
+#define U_ICU_NAMESPACE icu_2_8
 namespace U_ICU_NAMESPACE { }
 namespace icu = U_ICU_NAMESPACE;
 U_NAMESPACE_USE
@@ -174,21 +174,23 @@
  * compression, generating quad level always when strength is quad or more 
  * version 4 - ICU 2.2 - tracking UCA changes, ignore completely ignorables 
  * in contractions, ignore primary ignorables after shifted 
+ * version 5 - ICU 2.8 - changed implicit generation code
  * This value may change in the subsequent releases of ICU
  * @draft ICU 2.4
  */
-#define UCOL_RUNTIME_VERSION 4
+#define UCOL_RUNTIME_VERSION 5
 
 /** Builder code version. When this is different, same tailoring might result
  * in assigning different collation elements to code points                  
  * version 2 was in ICU 1.8.1. added support for prefixes, tweaked canonical 
  * closure. However, the tailorings should probably get same CEs assigned    
  * version 5 - ICU 2.2 - fixed some bugs, renamed some indirect values.      
+ * version 6 - ICU 2.8 - fixed bug in builder that allowed 0xFF in primary values
  * Backward compatible with the old rules. 
  * This value may change in the subsequent releases of ICU
  * @draft ICU 2.4
  */
-#define UCOL_BUILDER_VERSION 5
+#define UCOL_BUILDER_VERSION 6
 
 /** *** Removed *** Instead we use the data we read from FractionalUCA.txt
  * This is the version of FractionalUCA.txt tailoring rules