[sword-cvs] icu-sword/source/i18n/unicode calendar.h,1.3,1.4 choicfmt.h,1.2,1.3 coleitr.h,1.3,1.4 coll.h,1.3,1.4 datefmt.h,1.3,1.4 dcfmtsym.h,1.3,1.4 decimfmt.h,1.6,1.7 dtfmtsym.h,1.2,1.3 fieldpos.h,1.2,1.3 fmtable.h,1.2,1.3 format.h,1.2,1.3 gregocal.h,1.3,1.4 msgfmt.h,1.2,1.3 numfmt.h,1.3,1.4 rbnf.h,1.3,1.4 regex.h,NONE,1.1 search.h,1.4,1.5 simpletz.h,1.2,1.3 smpdtfmt.h,1.3,1.4 sortkey.h,1.3,1.4 stsearch.h,1.4,1.5 tblcoll.h,1.3,1.4 timezone.h,1.3,1.4 translit.h,1.4,1.5 ucal.h,1.3,1.4 ucol.h,1.4,1.5 ucoleitr.h,1.3,1.4 ucurr.h,NONE,1.1 udat.h,1.4,1.5 umsg.h,1.3,1.4 unifltlg.h,1.3,1.4 unirepl.h,NONE,1.1 unum.h,1.4,1.5 usearch.h,1.3,1.4 utrans.h,1.4,1.5

sword@www.crosswire.org sword@www.crosswire.org
Tue, 9 Sep 2003 19:43:37 -0700


Update of /usr/local/cvsroot/icu-sword/source/i18n/unicode
In directory www:/tmp/cvs-serv19862/source/i18n/unicode

Added Files:
	calendar.h choicfmt.h coleitr.h coll.h datefmt.h dcfmtsym.h 
	decimfmt.h dtfmtsym.h fieldpos.h fmtable.h format.h gregocal.h 
	msgfmt.h numfmt.h rbnf.h regex.h search.h simpletz.h 
	smpdtfmt.h sortkey.h stsearch.h tblcoll.h timezone.h 
	translit.h ucal.h ucol.h ucoleitr.h ucurr.h udat.h umsg.h 
	unifltlg.h unirepl.h unum.h usearch.h utrans.h 
Log Message:
ICU 2.6 commit
















--- NEW FILE: regex.h ---
/*
**********************************************************************
*   Copyright (C) 2002-2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   file name:  regex.h
*   encoding:   US-ASCII
*   indentation:4
*
*   created on: 2002oct22
*   created by: Andy Heninger
*
*   ICU Regular Expressions, API for C++
*/

#ifndef REGEX_H
#define REGEX_H


/**
 * \file
 * \brief  C++ API:  Regular Expressions
 *
 * <h2>Regular Expression API</h2>
 *
 * <p>The ICU API for processing regular expressions consists of two classes,
 *  <code>RegexPattern</code> and <code>RegexMatcher</code>.
 *  <code>RegexPattern</code> objects represent a pre-processed, or compiled
 *  regular expression.  They are created from a regular expression pattern string,
 *  and can be used to create <RegexMatcher> objects for the pattern.</p>
 *
 * <p>Class <code>RegexMatcher</code> bundles together a regular expression
 *  pattern and a target string to which the search pattern will be applied.
 *  <code>RegexMatcher</code> includes API for doing plain find or search
 *  operations, for search and replace operations, and for obtaining detailed
 *  information about bounds of a match. </p>
 */

#include "unicode/utypes.h"

#if !UCONFIG_NO_REGULAR_EXPRESSIONS

#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "unicode/parseerr.h"

U_NAMESPACE_BEGIN


// Forward Declarations...

class RegexMatcher;
class UVector;
class UVector32;
class UnicodeSet;
struct REStackFrame;
struct Regex8BitSet;


/**
 * Constants for Regular Expression Match Modes.
 * @draft ICU 2.4
 */
enum {
    /** Forces normalization of pattern and strings.  @draft ICU 2.4 */
    UREGEX_CANON_EQ         = 128,

    /**  Enable case insensitive matching.  @draft ICU 2.4 */
    UREGEX_CASE_INSENSITIVE = 2,

    /**  Allow white space and comments within patterns  @draft ICU 2.4 */
    UREGEX_COMMENTS         = 4,

    /**  If set, '.' matches line terminators,  otherwise '.' matching stops at line end.
      *  @draft ICU 2.4 */
    UREGEX_DOTALL           = 32,

    /**   Control behavior of "$" and "^"
      *    If set, recognize line terminators within string,
      *    otherwise, match only at start and end of input string.
      *   @draft ICU 2.4 */
    UREGEX_MULTILINE        = 8
};



/**
  * Class <code>RegexPattern</code> represents a compiled regular expression.  It includes
  * factory methods for creating a RegexPattern object from the source (string) form
  * of a regular expression, methods for creating RegexMatchers that allow the pattern
  * to be applied to input text, and a few convenience methods for simple common
  * uses of regular expressions.
  *
  * <p>Class RegexPattern is not intended to be subclassed.</p>
  *
  * @draft ICU 2.4
  */
class U_I18N_API RegexPattern: public UObject {
public:

    /**
      * default constructor.  Create a RegexPattern object that refers to no actual
      *   pattern.  Not normally needed; RegexPattern objects are usually
      *   created using the factory method <code>compile()</code>.
      *
      * @draft ICU 2.4
      */
    RegexPattern();

    /**
      * Copy Constructor.  Create a new RegexPattern object that is equivalent
      *                    to the source object.
      * @draft ICU 2.4
      */
    RegexPattern(const RegexPattern &source);

    /**
      * Destructor.  Note that a RegexPattern object must persist so long as any
      *  RegexMatcher objects that were created from the RegexPattern are active.
      * @draft ICU 2.4
      */
    virtual ~RegexPattern();

    /**
      * Comparison operator.  Two RegexPattern objects are considered equal if they
      * were constructed from identical source patterns using the same match flag
      * settings.
      * @param that a RegexPattern object to compare with "this".
      * @return TRUE if the objects are equivalent.
      * @draft ICU 2.4
      */
    UBool           operator==(const RegexPattern& that) const;

    /**
      * Comparison operator.  Two RegexPattern objects are considered equal if they
      * were constructed from identical source patterns using the same match flag
      * settings.
      * @param that a RegexPattern object to compare with "this".
      * @return TRUE if the objects are different.
      * @draft ICU 2.4
      */
    inline UBool    operator!=(const RegexPattern& that) const {return ! operator ==(that);};

    /**
     * Assignment operator.  After assignment, this RegexPattern will behave identically
     *     to the source object.
     * @draft ICU 2.4
     */
    RegexPattern  &operator =(const RegexPattern &source);

    /**
     * Create an exact copy of this RegexPattern object.  Since RegexPattern is not
     * intended to be subclasses, <code>clone()</code> and the copy construction are
     * equivalent operations.
     * @return the copy of this RegexPattern
     * @draft ICU 2.4
     */
    virtual RegexPattern  *clone() const;


   /**
    *     Compiles the regular expression in string form into a RegexPattern
    *     object.  These compile methods, rather than the constructors, are the usual
    *     way that RegexPattern objects are created.
    *
    *     <p>Note that RegexPattern objects must not be deleted while RegexMatcher
    *     objects created from the pattern are active.  RegexMatchers keep a pointer
    *     back to their pattern, so premature deletion of the pattern is a
    *     catastrophic error.</p>
    *
    *     <p>All pattern match mode flags are set to their default values.</p>
    *
    *    @param regex The regular expression to be compiled.
    *    @param pe    Receives the position (line and column nubers) of any error
    *                 within the regular expression.)
    *    @param status A reference to a UErrorCode to receive any errors.
    *    @return      A regexPattern object for the compiled pattern.
    *
    *    @draft ICU 2.4
    */
    static RegexPattern *compile( const UnicodeString &regex,
        UParseError          &pe,
        UErrorCode           &status);

   /**
    *     Compiles the regular expression in string form into a RegexPattern
    *     object using the specified match mode flags.  These compile methods,
    *     rather than the constructors, are the usual way that RegexPattern objects
    *     are created.
    *
    *     <p>Note that RegexPattern objects must not be deleted while RegexMatcher
    *     objects created from the pattern are active.  RegexMatchers keep a pointer
    *     back to their pattern, so premature deletion of the pattern is a
    *     catastrophic error.</p>
    *
    *    @param regex The regular expression to be compiled.
    *    @param flags The match mode flags to be used.
    *    @param pe    Receives the position (line and column nubers) of any error
    *                 within the regular expression.)
    *    @param status   A reference to a UErrorCode to receive any errors.
    *    @return      A regexPattern object for the compiled pattern.
    *
    *    @draft ICU 2.4
    */
    static RegexPattern *compile( const UnicodeString &regex,
        uint32_t             flags,
        UParseError          &pe,
        UErrorCode           &status);


   /**
    *     Compiles the regular expression in string form into a RegexPattern
    *     object using the specified match mode flags.  These compile methods,
    *     rather than the constructors, are the usual way that RegexPattern objects
    *     are created.
    *
    *     <p>Note that RegexPattern objects must not be deleted while RegexMatcher
    *     objects created from the pattern are active.  RegexMatchers keep a pointer
    *     back to their pattern, so premature deletion of the pattern is a
    *     catastrophic error.</p>
    *
    *    @param regex The regular expression to be compiled.
    *    @param flags The match mode flags to be used.
    *    @param status   A reference to a UErrorCode to receive any errors.
    *    @return      A regexPattern object for the compiled pattern.
    *
    *    @draft ICU 2.6
    */
    static RegexPattern *compile( const UnicodeString &regex,
        uint32_t             flags,
        UErrorCode           &status);


   /**
    *     Get the match mode flags that were used when compiling this pattern.
    *     @return  the match mode flags
    *     @draft ICU 2.4
    */
    virtual uint32_t flags() const;

   /**
    *  Creates a RegexMatcher that will match the given input against this pattern.  The
    *   RegexMatcher can then be used to perform match, find or replace operations
    *   on the input.  Note that a RegexPattern object must not be deleted while
    *   RegexMatchers created from it still exist and might possibly be used again.
    *
    *   @param input The input string to which the regular expression will be applied.
    *   @param status   A reference to a UErrorCode to receive any errors.
    *   @return      A RegexMatcher object for this pattern and input.
    *
    *   @draft ICU 2.4
    */
    virtual RegexMatcher *matcher(const UnicodeString &input,
        UErrorCode          &status) const;


   /**
    *  Creates a RegexMatcher that will match against this pattern.  The
    *   RegexMatcher can be used to perform match, find or replace operations.
    *   Note that a RegexPattern object must not be deleted while
    *   RegexMatchers created from it still exist and might possibly be used again.
    *
    *   @param status   A reference to a UErrorCode to receive any errors.
    *   @return      A RegexMatcher object for this pattern and input.
    *
    *   @draft ICU 2.6
    */
    virtual RegexMatcher *matcher(UErrorCode  &status) const;


   /**
    *  Test whether a string matches a regular expression.  This convenience function
    *   both compiles the reguluar expression and applies it in a single operation.
    *   Note that if the same pattern needs to be applied repeatedly, this method will be
    *   less efficient than creating and reusing a RegexPattern object.
    *
    *  @param regex The regular expression
    *  @param input The string data to be matched
    *  @param pe Receives the position of any syntax errors within the regular expression
    *  @param status A reference to a UErrorCode to receive any errors.
    *  @return True if the regular expression exactly matches the full input string.
    *
    *  @draft ICU 2.4
    */
    static UBool matches(const UnicodeString   &regex,
        const UnicodeString   &input,
        UParseError     &pe,
        UErrorCode      &status);


   /**
    *    Returns the regular expression from which this pattern was compiled.
    *    @draft ICU 2.4
    */
    virtual UnicodeString pattern() const;


    /**
     * Split a string into fields.  Somewhat like split() from Perl.
     * The pattern matches identify delimiters that separate the input
     *  into fields.  The input data between the matches becomes the
     *  fields themselves.
     * <p>
     *  For the best performance on split() operations,
     *  <code>RegexMatcher::split</code> is perferable to this function
     * 
     * @param input   The string to be split into fields.  The field delimiters
     *                match the pattern (in the "this" object)
     * @param dest    An array of UnicodeStrings to receive the results of the split.
     *                This is an array of actual UnicodeString objects, not an
     *                array of pointers to strings.  Local (stack based) arrays can
     *                work well here.
     * @param destCapacity  The number of elements in the destination array.
     *                If the number of fields found is less than destCapacity, the
     *                extra strings in the destination array are not altered.
     *                If the number of destination strings is less than the number
     *                of fields, the trailing part of the input string, including any
     *                field delimiters, is placed in the last destination string.
     * @param status  A reference to a UErrorCode to receive any errors.
     * @return        The number of fields into which the input string was split.
     * @draft ICU 2.4
     */
    virtual int32_t  split(const UnicodeString &input,
        UnicodeString    dest[],
        int32_t          destCapacity,
        UErrorCode       &status) const;



    /**
     *   dump   Debug function, displays the compiled form of a pattern.
     *   @internal
     */
    void dump() const;

    /**
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
     *
     * @draft ICU 2.4
     */
    virtual inline UClassID getDynamicClassID() const; 

    /**
     * ICU "poor man's RTTI", returns a UClassID for this class.
     *
     * @draft ICU 2.4
     */
    static inline UClassID getStaticClassID(); 

private:
    //
    //  Implementation Data
    //
    UnicodeString   fPattern;      // The original pattern string.
    uint32_t        fFlags;        // The flags used when compiling the pattern.
                                   //
    UVector32       *fCompiledPat; // The compiled pattern p-code.
    UnicodeString   fLiteralText;  // Any literal string data from the pattern,
                                   //   after un-escaping, for use during the match.

    UVector         *fSets;        // Any UnicodeSets referenced from the pattern.
    Regex8BitSet    *fSets8;       //      (and fast sets for latin-1 range.)


    UErrorCode      fDeferredStatus; // status if some prior error has left this
                                   //  RegexPattern in an unusable state.

    int32_t         fMinMatchLen;  // Minimum Match Length.  All matches will have length
                                   //   >= this value.  For some patterns, this calculated
                                   //   value may be less than the true shortest
                                   //   possible match.

    int32_t         fFrameSize;    // Size of a state stack frame in the
                                   //   execution engine.

    int32_t         fDataSize;     // The size of the data needed by the pattern that
                                   //   does not go on the state stack, but has just
                                   //   a single copy per matcher.

    UVector32       *fGroupMap;    // Map from capture group number to position of
                                   //   the group's variables in the matcher stack frame.

    int32_t         fMaxCaptureDigits;

    UnicodeSet     **fStaticSets;  // Ptr to static (shared) sets for predefined
                                   //   regex character classes, e.g. Word.

    Regex8BitSet   *fStaticSets8;  // Ptr to the static (shared) latin-1 only
                                   //  sets for predefined regex classes.

    int32_t         fStartType;    // Info on how a match must start.
    int32_t         fInitialStringIdx;     //  
    int32_t         fInitialStringLen;
    UnicodeSet     *fInitialChars;  
    UChar32         fInitialChar;
    Regex8BitSet   *fInitialChars8;

    /**
     * The address of this static class variable serves as this class's ID
     * for ICU "poor man's RTTI".
     */
    static const char fgClassID;

    friend class RegexCompile;
    friend class RegexMatcher;

    //
    //  Implementation Methods
    //
    void        init();            // Common initialization, for use by constructors.
    void        zap();             // Common cleanup
    void        dumpOp(int32_t index) const;


};









/**
 *  class RegexMatcher bundles together a reular expression pattern and
 *  input text to which the expression can be applied.  It includes methods
 *  for testing for matches, and for find and replace operations.
 *
 * <p>Class RegexMatcher is not intended to be subclassed.</p>
 *
 * @draft ICU 2.4
 */
class U_I18N_API RegexMatcher: public UObject {
public:

    /**
      * Construct a RegexMatcher for a regular expression.
      * This is a convenience method that avoids the need to explicitly create
      * a RegexPattern object.  Note that if several RegexMatchers need to be
      * created for the same expression, it will be more efficient to
      * separately create and cache a RegexPattern object, and use
      * its matcher() method to create the RegexMatcher objects.
      *
      *  @param regexp The Regular Expression to be compiled.
      *  @param flags  Regular expression options, such as case insensitive matching.
      *                @see UREGEX_CASE_INSENSITIVE
      *  @param status Any errors are reported by setting this UErrorCode variable.
      *  @draft ICU 2.6
      */
    RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status);

    /**
      * Construct a RegexMatcher for a regular expression.
      * This is a convenience method that avoids the need to explicitly create
      * a RegexPattern object.  Note that if several RegexMatchers need to be
      * created for the same expression, it will be more efficient to
      * separately create and cache a RegexPattern object, and use
      * its matcher() method to create the RegexMatcher objects.
      *
      *  @param regexp The Regular Expression to be compiled.
      *  @param input  The string to match
      *  @param flags  Regular expression options, such as case insensitive matching.
      *                @see UREGEX_CASE_INSENSITIVE
      *  @param status Any errors are reported by setting this UErrorCode variable.
      *  @draft ICU 2.6
      */
    RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
        uint32_t flags, UErrorCode &status);


   /**
    *   Destructor.  
    *
    *  @draft ICU 2.4
    */
    virtual ~RegexMatcher();


   /**
    *   Attempts to match the entire input string against the pattern.
    *    @param   status     A reference to a UErrorCode to receive any errors.
    *    @return TRUE if there is a match
    *    @draft ICU 2.4
    */
    virtual UBool matches(UErrorCode &status);



   /**
    *   Attempts to match the input string, starting from the beginning, against the pattern.
    *   Like the matches() method, this function always starts at the beginning of the input string;
    *   unlike that function, it does not require that the entire input string be matched.
    *
    *   <p>If the match succeeds then more information can be obtained via the <code>start()</code>,
    *     <code>end()</code>, and <code>group()</code> functions.</p>
    *
    *    @param   status     A reference to a UErrorCode to receive any errors.
    *    @return  TRUE if there is a match at the start of the input string.
    *    @draft ICU 2.4
    */
    virtual UBool lookingAt(UErrorCode &status);


   /**
    *  Find the next pattern match in the input string.
    *  The find begins searching the input at the location following the end of
    *  the previous match, or at the start of the string if there is no previous match.
    *  If a match is found, <code>start(), end()</code> and <code>group()</code>
    *  will provide more information regarding the match.
    *  <p>Note that if the input string is changed by the application,
    *     use find(startPos, status) instead of find(), because the saved starting
    *     position may not be valid with the altered input string.</p>
    *  @return  TRUE if a match is found.
    *  @draft ICU 2.4
    */
    virtual UBool find();


   /**
    *   Resets this RegexMatcher and then attempts to find the next substring of the
    *   input string that matches the pattern, starting at the specified index.
    *
    *   @param   start     the position in the input string to begin the search
    *   @param   status    A reference to a UErrorCode to receive any errors.
    *   @return  TRUE if a match is found.
    *   @draft ICU 2.4
    */
    virtual UBool find(int32_t start, UErrorCode &status);


   /**
    *   Returns a string containing the text matched by the previous match.
    *   If the pattern can match an empty string, an empty string may be returned.
    *   @param   status      A reference to a UErrorCode to receive any errors.
    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
    *                        has been attempted or the last match failed.
    *   @return  a string containing the matched input text.
    *   @draft ICU 2.4
    */
    virtual UnicodeString group(UErrorCode &status) const;


   /**
    *    Returns a string containing the text captured by the given group
    *    during the previous match operation.  Group(0) is the entire match.
    *
    *    @param groupNum the capture group number
    *    @param   status     A reference to a UErrorCode to receive any errors.
    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
    *                        has been attempted or the last match failed and
    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number.
    *    @return the captured text
    *    @draft ICU 2.4
    */
    virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;


   /**
    *   Returns the number of capturing groups in this matcher's pattern.
    *   @return the number of capture groups
    *   @draft ICU 2.4
    */
    virtual int32_t groupCount() const;


   /**
    *   Returns the index in the input string of the start of the text matched
    *   during the previous match operation.
    *    @param   status      a reference to a UErrorCode to receive any errors.
    *    @return              The position in the input string of the start of the last match.
    *    @draft ICU 2.4
    */
    virtual int32_t start(UErrorCode &status) const;


   /**
    *   Returns the index in the input string of the start of the text matched by the
    *    specified capture group during the previous match operation.  Return -1 if
    *    the capture group exists in the pattern, but was not part of the last match.
    *
    *    @param  group       the capture group number
    *    @param  status      A reference to a UErrorCode to receive any errors.  Possible
    *                        errors are  U_REGEX_INVALID_STATE if no match has been
    *                        attempted or the last match failed, and
    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
    *    @return the start position of substring matched by the specified group.
    *    @draft ICU 2.4
    */
    virtual int32_t start(int group, UErrorCode &status) const;


   /**
    *    Returns the index in the input string of the character following the
    *    text matched during the previous match operation.
    *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
    *                        errors are  U_REGEX_INVALID_STATE if no match has been
    *                        attempted or the last match failed.
    *    @return the index of the last character matched, plus one.
    *   @draft ICU 2.4
    */
    virtual int32_t end(UErrorCode &status) const;


   /**
    *    Returns the index in the input string of the character following the
    *    text matched by the specified capture group during the previous match operation.
    *    @param group  the capture group number
    *    @param   status      A reference to a UErrorCode to receive any errors.  Possible
    *                        errors are  U_REGEX_INVALID_STATE if no match has been
    *                        attempted or the last match failed and
    *                        U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number
    *    @return  the index of the last character, plus one, of the text
    *              captured by the specifed group during the previous match operation.
    *              Return -1 if the capture group was not part of the match.
    *    @draft ICU 2.4
    */
    virtual int32_t end(int group, UErrorCode &status) const;


   /**
    *   Resets this matcher.  The effect is to remove any memory of previous matches,
    *       and to cause subsequent find() operations to begin at the beginning of
    *       the input string.
    *
    *   @return this RegexMatcher.
    *   @draft ICU 2.4
    */
    virtual RegexMatcher &reset();


   /**
    *   Resets this matcher with a new input string.  This allows instances of RegexMatcher
    *     to be reused, which is more efficient than creating a new RegexMatcher for
    *     each input string to be processed.
    *   @return this RegexMatcher.
    *   @draft ICU 2.4
    */
    virtual RegexMatcher &reset(const UnicodeString &input);


   /**
    *   Returns the input string being matched.  The returned string is not a copy,
    *   but the live input string.  It should not be altered or deleted.
    *   @return the input string
    *   @draft ICU 2.4
    */
    virtual const UnicodeString &input() const;


   /**
    *    Returns the pattern that is interpreted by this matcher.
    *    @return  the RegexPattern for this RegexMatcher
    *    @draft ICU 2.4
    */
    virtual const RegexPattern &pattern() const;


   /**
    *    Replaces every substring of the input that matches the pattern
    *    with the given replacement string.  This is a convenience function that
    *    provides a complete find-and-replace-all operation.
    *
    *    This method first resets this matcher. It then scans the input string
    *    looking for matches of the pattern. Input that is not part of any
    *    match is left unchanged; each match is replaced in the result by the
    *    replacement string. The replacement string may contain references to
    *    capture groups.
    *
    *    @param   replacement a string containing the replacement text.
    *    @param   status      a reference to a UErrorCode to receive any errors.
    *    @return              a string containing the results of the find and replace.
    *    @draft ICU 2.4
    */
    virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);


   /**
    * Replaces the first substring of the input that matches
    * the pattern with the replacement string.   This is a convenience
    * function that provides a complete find-and-replace operation.
    *
    * <p>This function first resets this RegexMatcher. It then scans the input string
    * looking for a match of the pattern. Input that is not part
    * of the match is appended directly to the result string; the match is replaced
    * in the result by the replacement string. The replacement string may contain
    * references to captured groups.</p>
    *
    * <p>The state of the matcher (the position at which a subsequent find()
    *    would begin) after completing a replaceFirst() is not specified.  The
    *    RegexMatcher should be reset before doing additional find() operations.</p>
    *
    *    @param   replacement a string containing the replacement text.
    *    @param   status      a reference to a UErrorCode to receive any errors.
    *    @return              a string containing the results of the find and replace.
    *    @draft ICU 2.4
    */
    virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);

   /**
    *   Implements a replace operation intended to be used as part of an
    *   incremental find-and-replace.
    *
    *   <p>The input string, starting from the end of the previous match and ending at
    *   the start of the current match, is appended to the destination string.  Then the
    *   replacement string is appended to the output string,
    *   including handling any substitutions of captured text.</p>
    *
    *   <p>For simple, prepackaged, non-incremental find-and-replace
    *   operations, see replaceFirst() or replaceAll().</p>
    *
    *   @param   dest        A UnicodeString to which the results of the find-and-replace are appended.
    *   @param   replacement A UnicodeString that provides the text to be substitured for
    *                        the input text that matched the regexp pattern.  The replacement
    *                        text may contain references to captured text from the
    *                        input.
    *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
    *                        errors are  U_REGEX_INVALID_STATE if no match has been
    *                        attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR
    *                        if the replacement text specifies a capture group that
    *                        does not exist in the pattern.
    *
    *   @return  this  RegexMatcher
    *   @draft ICU 2.4
    *
    */
    virtual RegexMatcher &appendReplacement(UnicodeString &dest,
        const UnicodeString &replacement, UErrorCode &status);


   /**
    * As the final step in a find-and-replace operation, append the remainder
    * of the input string, starting at the position following the last match,
    * to the destination string. <code>appendTail()</code> is intended to be invoked after one
    * or more invocations of the <code>RegexMatcher::appendReplacement()</code>.
    *
    *  @param dest A UnicodeString to which the results of the find-and-replace are appended.
    *  @return  the destination string.
    *  @draft ICU 2.4
    */
    virtual UnicodeString &appendTail(UnicodeString &dest);



    /**
     * Split a string into fields.  Somewhat like split() from Perl.
     * The pattern matches identify delimiters that separate the input
     *  into fields.  The input data between the matches becomes the
     *  fields themselves.
     * <p>
     * 
     * @param input   The string to be split into fields.  The field delimiters
     *                match the pattern (in the "this" object).  This matcher
     *                will be reset to this input string.
     * @param dest    An array of UnicodeStrings to receive the results of the split.
     *                This is an array of actual UnicodeString objects, not an
     *                array of pointers to strings.  Local (stack based) arrays can
     *                work well here.
     * @param destCapacity  The number of elements in the destination array.
     *                If the number of fields found is less than destCapacity, the
     *                extra strings in the destination array are not altered.
     *                If the number of destination strings is less than the number
     *                of fields, the trailing part of the input string, including any
     *                field delimiters, is placed in the last destination string.
     * @param status  A reference to a UErrorCode to receive any errors.
     * @return        The number of fields into which the input string was split.
     * @draft ICU 2.6
     */
    virtual int32_t  split(const UnicodeString &input,
        UnicodeString    dest[],
        int32_t          destCapacity,
        UErrorCode       &status);



   /**
     *   setTrace   Debug function, enable/disable tracing of the matching engine.
     *              For internal ICU development use only.  DO NO USE!!!!
     *   @internal
     */
    void setTrace(UBool state);


    /**
    * ICU "poor man's RTTI", returns a UClassID for this class.
    *
    * @draft ICU 2.2
    */
    static inline UClassID getStaticClassID();

    /**
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
     *
     * @draft ICU 2.2
     */
    virtual inline UClassID getDynamicClassID() const;

private:
    // Constructors and other object boilerplate are private.
    // Instances of RegexMatcher can not be assigned, copied, cloned, etc.
    RegexMatcher(); // default constructor not implemented
    RegexMatcher(const RegexPattern *pat);
    RegexMatcher(const RegexMatcher &other);
    RegexMatcher &operator =(const RegexMatcher &rhs);
    friend class RegexPattern;


    //
    //  MatchAt   This is the internal interface to the match engine itself.
    //            Match status comes back in matcher member variables.
    //
    void                 MatchAt(int32_t startIdx, UErrorCode &status);
    inline void          backTrack(int32_t &inputIdx, int32_t &patIdx);
    UBool                isWordBoundary(int32_t pos);         // perform the \b test
    REStackFrame        *resetStack();
    inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx,
                                   int32_t frameSize, UErrorCode &status);


    const RegexPattern  *fPattern;
    RegexPattern        *fPatternOwned;    // Non-NULL if this matcher owns the pattern, and
                                           //   should delete it when through.
    const UnicodeString *fInput;

    UBool                fMatch;           // True if the last match was successful.
    int32_t              fMatchStart;      // Position of the start of the most recent match
    int32_t              fMatchEnd;        // First position after the end of the most recent match
    int32_t              fLastMatchEnd;    // First position after the end of the previous match.

    UVector32           *fStack;
    REStackFrame        *fFrame;           // After finding a match, the last active stack
                                           //   frame, which will contain the capture group results.
                                           //   NOT valid while match engine is running.

    int32_t             *fData;            // Data area for use by the compiled pattern.
    int32_t             fSmallData[8];     //   Use this for data if it's enough.

    UBool               fTraceDebug;       // Set true for debug tracing of match engine.

    UErrorCode          fDeferredStatus;   // Save error state if that cannot be immediately
                                           //   reported, or that permanently disables this matcher.

    /**
     * The address of this static class variable serves as this class's ID
     * for ICU "poor man's RTTI".
     */
    static const char   fgClassID;


};

inline UClassID RegexPattern::getStaticClassID() { return (UClassID)&fgClassID; }
inline UClassID RegexPattern::getDynamicClassID() const { return getStaticClassID(); }

inline UClassID RegexMatcher::getStaticClassID() { return (UClassID)&fgClassID; }
inline UClassID RegexMatcher::getDynamicClassID() const { return getStaticClassID(); }


U_NAMESPACE_END
#endif  // UCONFIG_NO_REGULAR_EXPRESSIONS
#endif












--- NEW FILE: ucurr.h ---
/*
**********************************************************************
* Copyright (c) 2002-2003, International Business Machines
* Corporation and others.  All Rights Reserved.
**********************************************************************
* $Source: /usr/local/cvsroot/icu-sword/source/i18n/unicode/ucurr.h,v $ 
* $Revision: 1.1 $
**********************************************************************
*/
#ifndef _UCURR_H_
#define _UCURR_H_

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING

typedef const void* UCurrRegistryKey;

/**
 * The ucurr API encapsulates information about a currency, as defined by
 * ISO 4217.  A currency is represented by a 3-character string
 * containing its ISO 4217 code.  This API can return various data
 * necessary the proper display of a currency:
 *
 * <ul><li>A display symbol, for a specific locale
 * <li>The number of fraction digits to display
 * <li>A rounding increment
 * </ul>
 *
 * The <tt>DecimalFormat</tt> class uses these data to display
 * currencies.
 * @author Alan Liu
 * @since ICU 2.2
 */

/**
 * Returns a currency code for the default currency in the given
 * locale.
 * @param locale the locale for which to retrieve a currency code
 * @param ec error code
 * @return a pointer to a 3-character ISO 4217 currency code, or
 * NULL if none is found.
 * @draft ICU 2.2
 */
U_CAPI const UChar* U_EXPORT2
ucurr_forLocale(const char* locale,
                UErrorCode* ec);

/**
 * Selector constants for ucurr_getName().
 *
 * @see ucurr_getName
 * @draft ICU 2.6
 */
typedef enum UCurrNameStyle {
    /**
     * Selector for ucurr_getName indicating a symbolic name for a
     * currency, such as "$" for USD.
     * @draft ICU 2.6
     */
    UCURR_SYMBOL_NAME,

    /**
     * Selector for ucurr_getName indicating the long name for a
     * currency, such as "US Dollar" for USD.
     * @draft ICU 2.6
     */
    UCURR_LONG_NAME
} UCurrNameStyle;

/**
 * Register an (existing) ISO 4217 currency code for the given locale.
 * Only the country code and the two variants EURO and PRE_EURO are
 * recognized.
 * @param isoCode the three-letter ISO 4217 currency code
 * @param locale  the locale for which to register this currency code
 * @param status the in/out status code
 * @return a registry key that can be used to unregister this currency code, or NULL
 * if there was an error.
 * @draft ICU 2.6
 */
U_CAPI UCurrRegistryKey U_EXPORT2
    ucurr_register(const UChar* isoCode, 
                   const char* locale,  
                   UErrorCode* status);
/**
 * Unregister the previously-registered currency definitions using the
 * URegistryKey returned from ucurr_register.  Key becomes invalid after
 * a successful call and should not be used again.  Any currency 
 * that might have been hidden by the original ucurr_register call is 
 * restored.
 * @param key the registry key returned by a previous call to ucurr_register
 * @param status the in/out status code, no special meanings are assigned
 * @return TRUE if the currency for this key was successfully unregistered
 */
U_CAPI UBool U_EXPORT2
    ucurr_unregister(UCurrRegistryKey key, UErrorCode* status);

/**
 * Returns the display name for the given currency in the
 * given locale.  For example, the display name for the USD
 * currency object in the en_US locale is "$".
 * @param currency null-terminated 3-letter ISO 4217 code
 * @param locale locale in which to display currency
 * @param nameStyle selector for which kind of name to return
 * @param isChoiceFormat fill-in set to TRUE if the returned value
 * is a ChoiceFormat pattern; otherwise it is a static string
 * @param len fill-in parameter to receive length of result
 * @param ec error code
 * @return pointer to display string of 'len' UChars.  If the resource
 * data contains no entry for 'currency', then 'currency' itself is
 * returned.  If *isChoiceFormat is TRUE, then the result is a
 * ChoiceFormat pattern.  Otherwise it is a static string.
 * @draft ICU 2.6
 */
U_CAPI const UChar* U_EXPORT2
ucurr_getName(const UChar* currency,
              const char* locale,
              UCurrNameStyle nameStyle,
              UBool* isChoiceFormat,
              int32_t* len,
              UErrorCode* ec);

/**
 * Returns the number of the number of fraction digits that should
 * be displayed for the given currency.
 * @param currency null-terminated 3-letter ISO 4217 code
 * @return a non-negative number of fraction digits to be
 * displayed
 * @draft ICU 2.2
 */
U_CAPI int32_t U_EXPORT2
ucurr_getDefaultFractionDigits(const UChar* currency);

/**
 * Returns the rounding increment for the given currency, or 0.0 if no
 * rounding is done by the currency.
 * @param currency null-terminated 3-letter ISO 4217 code
 * @return the non-negative rounding increment, or 0.0 if none
 * @draft ICU 2.2
 */
U_CAPI double U_EXPORT2
ucurr_getRoundingIncrement(const UChar* currency);

#endif /* #if !UCONFIG_NO_FORMATTING */

#endif




--- NEW FILE: unirepl.h ---
/*
**********************************************************************
*   Copyright (c) 2002, International Business Machines Corporation
*   and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   01/14/2002  aliu        Creation.
**********************************************************************
*/
#ifndef UNIREPL_H
#define UNIREPL_H

#include "unicode/utypes.h"

U_NAMESPACE_BEGIN

class Replaceable;
class UnicodeString;
class UnicodeSet;

/**
 * <code>UnicodeReplacer</code> defines a protocol for objects that
 * replace a range of characters in a Replaceable string with output
 * text.  The replacement is done via the Replaceable API so as to
 * preserve out-of-band data.
 *
 * <p>This is a mixin class.
 * @author Alan Liu
 * @draft ICU 2.4
 */
class U_I18N_API UnicodeReplacer /* not : public UObject because this is an interface/mixin class */ {

 public:

    /**
     * Destructor.
     * @draft ICU 2.4
     */
    virtual inline ~UnicodeReplacer() {};

    /**
     * Replace characters in 'text' from 'start' to 'limit' with the
     * output text of this object.  Update the 'cursor' parameter to
     * give the cursor position and return the length of the
     * replacement text.
     *
     * @param text the text to be matched
     * @param start inclusive start index of text to be replaced
     * @param limit exclusive end index of text to be replaced;
     * must be greater than or equal to start
     * @param cursor output parameter for the cursor position.
     * Not all replacer objects will update this, but in a complete
     * tree of replacer objects, representing the entire output side
     * of a transliteration rule, at least one must update it.
     * @return the number of 16-bit code units in the text replacing
     * the characters at offsets start..(limit-1) in text
     * @draft ICU 2.4
     */
    virtual int32_t replace(Replaceable& text,
                            int32_t start,
                            int32_t limit,
                            int32_t& cursor) = 0;

    /**
     * Returns a string representation of this replacer.  If the
     * result of calling this function is passed to the appropriate
     * parser, typically TransliteratorParser, it will produce another
     * replacer that is equal to this one.
     * @param result the string to receive the pattern.  Previous
     * contents will be deleted.
     * @param escapeUnprintable if TRUE then convert unprintable
     * character to their hex escape representations, \\uxxxx or
     * \\Uxxxxxxxx.  Unprintable characters are defined by
     * Utility.isUnprintable().
     * @return a reference to 'result'.
     * @draft ICU 2.4
     */
    virtual UnicodeString& toReplacerPattern(UnicodeString& result,
                                             UBool escapeUnprintable) const = 0;

    /**
     * Union the set of all characters that may output by this object
     * into the given set.
     * @param toUnionTo the set into which to union the output characters
     * @draft ICU 2.4
     */
    virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const = 0;
};

U_NAMESPACE_END

#endif