[sword-svn] r153 - in trunk: . as_is/borland source source/common source/common/unicode source/config source/data/misc source/i18n source/i18n/unicode source/layout source/layoutex/layout source/test/cintltst source/test/intltest source/test/letest source/test/testdata source/tools/genrb source/tools/toolutil source/tools/tzcode
chrislit at www.crosswire.org
chrislit at www.crosswire.org
Sat May 31 07:31:33 MST 2008
Author: chrislit
Date: 2008-05-31 07:31:31 -0700 (Sat, 31 May 2008)
New Revision: 153
Added:
trunk/source/common/dtintrv.cpp
trunk/source/common/unicode/dtintrv.h
trunk/source/i18n/dtitv_impl.h
trunk/source/i18n/dtitvfmt.cpp
trunk/source/i18n/dtitvinf.cpp
trunk/source/i18n/unicode/dtitvfmt.h
trunk/source/i18n/unicode/dtitvinf.h
trunk/source/test/intltest/dtifmtts.cpp
trunk/source/test/intltest/dtifmtts.h
trunk/source/test/intltest/plurfmts.cpp
trunk/source/test/intltest/plurults.cpp
trunk/source/test/intltest/ssearch.cpp
trunk/source/test/intltest/ssearch.h
trunk/source/test/testdata/ssearch.xml
trunk/source/test/testdata/test1bmp.ucm
Modified:
trunk/as_is/borland/icuin.bpf
trunk/as_is/borland/icuin.bpr
trunk/as_is/borland/icuuc.bpf
trunk/as_is/borland/icuuc.bpr
trunk/readme.html
trunk/source/Makefile.in
trunk/source/common/Makefile.in
trunk/source/common/common.vcproj
trunk/source/common/ubidi.c
trunk/source/common/unicode/utext.h
trunk/source/config/Makefile.inc.in
trunk/source/configure.in
trunk/source/data/misc/zoneinfo.txt
trunk/source/i18n/Makefile.in
trunk/source/i18n/datefmt.cpp
trunk/source/i18n/i18n.vcproj
trunk/source/i18n/search.cpp
trunk/source/i18n/smpdtfmt.cpp
trunk/source/i18n/stsearch.cpp
trunk/source/i18n/ucal.cpp
trunk/source/i18n/ucol.cpp
trunk/source/i18n/ucol_imp.h
trunk/source/i18n/ucoleitr.cpp
trunk/source/i18n/ucurr.cpp
trunk/source/i18n/unicode/coleitr.h
trunk/source/i18n/unicode/coll.h
trunk/source/i18n/unicode/datefmt.h
trunk/source/i18n/unicode/dtptngen.h
trunk/source/i18n/unicode/msgfmt.h
trunk/source/i18n/unicode/rbtz.h
trunk/source/i18n/unicode/smpdtfmt.h
trunk/source/i18n/unicode/tblcoll.h
trunk/source/i18n/unicode/ucal.h
trunk/source/i18n/unicode/ucoleitr.h
trunk/source/i18n/unicode/ucurr.h
trunk/source/i18n/unicode/udat.h
trunk/source/i18n/unicode/usearch.h
trunk/source/i18n/usearch.cpp
trunk/source/i18n/usrchimp.h
trunk/source/icudefs.mk.in
trunk/source/layout/IndicClassTables.cpp
trunk/source/layout/IndicReordering.cpp
trunk/source/layout/IndicReordering.h
trunk/source/layout/OpenTypeLayoutEngine.cpp
trunk/source/layout/ScriptAndLanguage.cpp
trunk/source/layoutex/layout/RunArrays.h
trunk/source/layoutex/layout/plruns.h
trunk/source/test/cintltst/callcoll.c
trunk/source/test/cintltst/callcoll.h
trunk/source/test/cintltst/ccaltst.c
trunk/source/test/cintltst/citertst.c
trunk/source/test/cintltst/usrchdat.c
trunk/source/test/cintltst/usrchtst.c
trunk/source/test/intltest/Makefile.in
trunk/source/test/intltest/intltest.vcproj
trunk/source/test/intltest/itercoll.cpp
trunk/source/test/intltest/itformat.cpp
trunk/source/test/intltest/loctest.cpp
trunk/source/test/intltest/srchtest.cpp
trunk/source/test/intltest/srchtest.h
trunk/source/test/intltest/tscoll.cpp
trunk/source/test/intltest/tscoll.h
trunk/source/test/intltest/tztest.cpp
trunk/source/test/letest/gendata.xml
trunk/source/test/testdata/DataDrivenCollationTest.txt
trunk/source/test/testdata/Makefile.in
trunk/source/test/testdata/letest.xml
trunk/source/tools/genrb/read.c
trunk/source/tools/toolutil/ucbuf.c
trunk/source/tools/toolutil/ucbuf.h
trunk/source/tools/toolutil/xmlparser.cpp
trunk/source/tools/tzcode/icuzones
Log:
ICU 4.0 d02 update
Modified: trunk/as_is/borland/icuin.bpf
===================================================================
--- trunk/as_is/borland/icuin.bpf 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/as_is/borland/icuin.bpf 2008-05-31 14:31:31 UTC (rev 153)
@@ -126,6 +126,8 @@
USEUNIT("..\..\source\i18n\ethpccal.cpp");
USEUNIT("..\..\source\i18n\plurfmt.cpp");
USEUNIT("..\..\source\i18n\plurrule.cpp");
+USEUNIT("..\..\source\i18n\dtitvfmt.cpp");
+USEUNIT("..\..\source\i18n\dtitvinf.cpp");
//---------------------------------------------------------------------------
#define Library
Modified: trunk/as_is/borland/icuin.bpr
===================================================================
--- trunk/as_is/borland/icuin.bpr 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/as_is/borland/icuin.bpr 2008-05-31 14:31:31 UTC (rev 153)
@@ -64,7 +64,8 @@
..\..\source\i18n\zonemeta.obj ..\..\source\i18n\zstrfmt.obj
..\..\source\i18n\brktrans.obj ..\..\source\i18n\cecal.obj
..\..\source\i18n\coptccal.obj ..\..\source\i18n\ethpccal.obj
- ..\..\source\i18n\plurfmt.obj ..\..\source\i18n\plurrule.obj"/>
+ ..\..\source\i18n\plurfmt.obj ..\..\source\i18n\plurrule.obj
+ ..\..\source\i18n\dtitvfmt.obj ..\..\source\i18n\dtitvinf.obj"/>
<RESFILES value=""/>
<IDLFILES value=""/>
<IDLGENFILES value=""/>
Modified: trunk/as_is/borland/icuuc.bpf
===================================================================
--- trunk/as_is/borland/icuuc.bpf 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/as_is/borland/icuuc.bpf 2008-05-31 14:31:31 UTC (rev 153)
@@ -134,6 +134,7 @@
USEUNIT("..\..\source\common\bmpset.cpp");
USEUNIT("..\..\source\common\unisetspan.cpp");
USEUNIT("..\..\source\common\mutex.cpp");
+USEUNIT("..\..\source\common\dtintrv.cpp");
//---------------------------------------------------------------------------
#define Library
Modified: trunk/as_is/borland/icuuc.bpr
===================================================================
--- trunk/as_is/borland/icuuc.bpr 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/as_is/borland/icuuc.bpr 2008-05-31 14:31:31 UTC (rev 153)
@@ -69,7 +69,7 @@
..\..\source\common\triedict.obj ..\..\source\common\util_props.obj
..\..\source\common\wintz.obj ..\..\source\common\umath.obj
..\..\source\common\bmpset.obj ..\..\source\common\unisetspan.obj
- ..\..\source\common\mutex.obj"/>
+ ..\..\source\common\mutex.obj ..\..\source\common\dtintrv.obj"/>
<RESFILES value=""/>
<IDLFILES value=""/>
<IDLGENFILES value=""/>
Modified: trunk/readme.html
===================================================================
--- trunk/readme.html 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/readme.html 2008-05-31 14:31:31 UTC (rev 153)
@@ -30,10 +30,9 @@
<body>
<h1>International Components for Unicode<br />
- <abbr title="International Components for Unicode">ICU</abbr> 3.9.1
- ReadMe</h1>
+ <abbr title="International Components for Unicode">ICU</abbr> 4.0 (draft) ReadMe</h1>
- <p>Version: 2008 May 2<br />
+ <p>Version: 2008 May 28<br />
Copyright © 1997-2008 International Business Machines Corporation and
others. All Rights Reserved.</p>
<!-- Remember that there is a copyright at the end too -->
@@ -214,17 +213,9 @@
<p>The following list concentrates on <em>changes that affect existing
applications migrating from previous ICU releases</em>. For more news about
- this release, see the <a href="http://www.icu-project.org/download/">ICU 3.8
+ this release, see the <a href="http://www.icu-project.org/download/">ICU 4.0
download page</a>.</p>
- <h3><a name="News_timezone" id="News_timezone">Changes to timezone formatting
- and parsing</a></h3>
-
- <p>In ICU 3.8, the behavior of date formatting and parsing has changed
- significantly, perhaps requiring recoding on your part depending on your
- usage. For more information, see <a href="http://icu-project.org/userguide/formatDateTime.html">
- Formatting Dates and Times</a> in the User Guide.</p>
-
<h2><a name="Download" href="#Download" id="Download">How To Download the
Source Code</a></h2>
@@ -1668,7 +1659,7 @@
</ul>
<hr />
- <p>Copyright © 1997-2007 International Business Machines Corporation and
+ <p>Copyright © 1997-2008 International Business Machines Corporation and
others. All Rights Reserved.<br />
IBM Globalization Center of Competency - San José<br />
4400 North First Street<br />
Modified: trunk/source/Makefile.in
===================================================================
--- trunk/source/Makefile.in 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/Makefile.in 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
#******************************************************************************
#
-# Copyright (C) 1998-2007, International Business Machines
+# Copyright (C) 1998-2008, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
@@ -32,9 +32,10 @@
@SAMPLES_TRUE at SAMPLE = samples
DOXYGEN = @DOXYGEN@
+DOCZIP = icu-docs.zip
## Files to remove for 'make clean'
-CLEANFILES = *~
+CLEANFILES = *~
## Files built (autoconfed) and installed
INSTALLED_BUILT_FILES = $(top_builddir)/config/Makefile.inc $(top_builddir)/config/icu-config @platform_make_fragment@ $(EXTRA_DATA:%=$(DESTDIR)$(pkglibdir)/%)
@@ -85,6 +86,10 @@
Doxyfile: $(srcdir)/Doxyfile.in
CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(DOCZIP): doc
+ -$(RMV) $(DOCZIP)
+ ( cd doc/html ; zip -r ../../$(DOCZIP) * )
endif
LOCAL_SUBDIRS = $(SUBDIRS)
@@ -153,7 +158,7 @@
clean-local:
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
- $(RMV) Doxyfile doc
+ $(RMV) Doxyfile doc $(DOCZIP)
distclean-local: clean-local
$(RMV) $(top_builddir)/config/Makefile.inc $(top_builddir)/config/icu-config
Modified: trunk/source/common/Makefile.in
===================================================================
--- trunk/source/common/Makefile.in 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/common/Makefile.in 2008-05-31 14:31:31 UTC (rev 153)
@@ -85,7 +85,7 @@
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
uidna.o usprep.o punycode.o \
-util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o mutex.o
+util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o mutex.o dtintrv.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h unicode/*.h
Modified: trunk/source/common/common.vcproj
===================================================================
--- trunk/source/common/common.vcproj 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/common/common.vcproj 2008-05-31 14:31:31 UTC (rev 153)
@@ -1367,6 +1367,14 @@
Name="formatting"
>
<File
+ RelativePath=".\dtintrv.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\unicode\dtintrv.h"
+ >
+ </File>
+ <File
RelativePath=".\unicode\parseerr.h"
>
<FileConfiguration
Added: trunk/source/common/dtintrv.cpp
===================================================================
--- trunk/source/common/dtintrv.cpp (rev 0)
+++ trunk/source/common/dtintrv.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,50 @@
+/*******************************************************************************
+* Copyright (C) 2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTINTRV.CPP
+*
+*******************************************************************************
+*/
+
+
+
+#include "unicode/dtintrv.h"
+
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateInterval)
+
+//DateInterval::DateInterval(){}
+
+
+DateInterval::DateInterval(const UDate from, const UDate to)
+: fromDate(from),
+ toDate(to)
+{}
+
+
+DateInterval::~DateInterval(){}
+
+
+DateInterval::DateInterval(const DateInterval& other)
+: UObject(other) {
+ *this = other;
+}
+
+
+DateInterval&
+DateInterval::operator=(const DateInterval& other) {
+ if ( this != &other ) {
+ fromDate = other.fromDate;
+ toDate = other.toDate;
+ }
+ return *this;
+}
+
+
+
+U_NAMESPACE_END
+
Modified: trunk/source/common/ubidi.c
===================================================================
--- trunk/source/common/ubidi.c 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/common/ubidi.c 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,7 +1,7 @@
/*
******************************************************************************
*
-* Copyright (C) 1999-2007, International Business Machines
+* Copyright (C) 1999-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@@ -800,7 +800,7 @@
/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */
0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10
};
-enum { _L=0, _R=1, _EN=2, _AN=3, _ON=4, _S=5, _B=6 }; /* reduced dirProp */
+enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
/******************************************************************
@@ -840,24 +840,24 @@
static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
{
/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , Res */
-/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , _ON },
-/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3), _L },
-/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3), _R },
-/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 , _R },
-/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), _EN },
-/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3), _AN },
-/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), _AN },
-/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3), _ON },
-/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3), _ON },
-/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), _ON },
-/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), _EN },
-/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), _EN },
-/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3), _AN },
-/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), _AN },
-/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3), _ON },
-/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3), _S },
-/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3), _S },
-/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3), _B }
+/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , DirProp_ON },
+/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3), DirProp_L },
+/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3), DirProp_R },
+/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 , DirProp_R },
+/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), DirProp_EN },
+/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3), DirProp_AN },
+/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), DirProp_AN },
+/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3), DirProp_ON },
+/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3), DirProp_ON },
+/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), DirProp_ON },
+/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), DirProp_EN },
+/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), DirProp_EN },
+/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3), DirProp_AN },
+/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), DirProp_AN },
+/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3), DirProp_ON },
+/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3), DirProp_S },
+/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3), DirProp_S },
+/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3), DirProp_B }
};
/* we must undef macro s because the levels table have a different
@@ -884,7 +884,7 @@
Definitions and type for levels state tables
*******************************************************************
*/
-#define IMPTABLEVELS_COLUMNS (_B + 2)
+#define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
#define GET_STATE(cell) ((cell)&0x0f)
#define GET_ACTION(cell) ((cell)>>4)
@@ -1238,7 +1238,7 @@
if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
start=pLevState->startON; /* reset to basic run level */
}
- if (_prop == _S) /* add LRM before S */
+ if (_prop == DirProp_S) /* add LRM before S */
{
addPoint(pBiDi, start0, LRM_BEFORE);
pInsertPoints->confirmed=pInsertPoints->size;
@@ -1254,7 +1254,7 @@
/* mark insert points as confirmed */
pInsertPoints->confirmed=pInsertPoints->size;
pLevState->lastStrongRTL=-1;
- if (_prop == _S) /* add LRM before S */
+ if (_prop == DirProp_S) /* add LRM before S */
{
addPoint(pBiDi, start0, LRM_BEFORE);
pInsertPoints->confirmed=pInsertPoints->size;
@@ -1274,7 +1274,7 @@
case 5: /* EN/AN after R/AL + possible cont */
/* check for real AN */
- if ((_prop == _AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) &&
+ if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) &&
(pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
{
/* real AN */
@@ -1326,7 +1326,7 @@
/* false alert, infirm LRMs around previous AN */
pInsertPoints=&(pBiDi->insertPoints);
pInsertPoints->size=pInsertPoints->confirmed;
- if (_prop == _S) /* add RLM before S */
+ if (_prop == DirProp_S) /* add RLM before S */
{
addPoint(pBiDi, start0, RLM_BEFORE);
pInsertPoints->confirmed=pInsertPoints->size;
@@ -1479,7 +1479,7 @@
break;
case 3: /* process seq1, process seq2, init new seq1 */
processPropertySeq(pBiDi, &levState, resProp, start1, start2);
- processPropertySeq(pBiDi, &levState, _ON, start2, i);
+ processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
start1=i;
break;
case 4: /* process seq1, set seq1=seq2, init new seq2 */
Added: trunk/source/common/unicode/dtintrv.h
===================================================================
--- trunk/source/common/unicode/dtintrv.h (rev 0)
+++ trunk/source/common/unicode/dtintrv.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,172 @@
+/*
+*******************************************************************************
+* Copyright (C) 2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTINTRV.H
+*
+*******************************************************************************
+*/
+
+#ifndef __DTINTRV_H__
+#define __DTINTRV_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Date Interval data type
+ */
+
+
+U_NAMESPACE_BEGIN
+
+
+/**
+ * This class represents date interval.
+ * It is a pair of UDate representing from UDate 1 to UDate 2.
+ * @draft ICU 4.0
+**/
+class U_COMMON_API DateInterval : public UObject {
+public:
+
+ /**
+ * Constructor given from date and to date.
+ * @param fromDate The from date in date interval.
+ * @param toDate The to date in date interval.
+ * @draft ICU 4.0
+ */
+ DateInterval(const UDate fromDate, const UDate toDate);
+
+ /**
+ * destructor
+ * @draft ICU 4.0
+ */
+ ~DateInterval();
+
+ /**
+ * Get the from date.
+ * @return the from date in dateInterval.
+ * @draft ICU 4.0
+ */
+ UDate getFromDate() const;
+
+ /**
+ * Get the to date.
+ * @return the to date in dateInterval.
+ * @draft ICU 4.0
+ */
+ UDate getToDate() const;
+
+
+ /**
+ * Return the class ID for this class. This is useful only for comparing to
+ * a return value from getDynamicClassID(). For example:
+ * <pre>
+ * . Base* polymorphic_pointer = createPolymorphicObject();
+ * . if (polymorphic_pointer->getDynamicClassID() ==
+ * . erived::getStaticClassID()) ...
+ * </pre>
+ * @return The class ID for all objects of this class.
+ * @draft ICU 4.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+ * method is to implement a simple version of RTTI, since not all C++
+ * compilers support genuine RTTI. Polymorphic operator==() and clone()
+ * methods call this method.
+ *
+ * @return The class ID for this object. All objects of a
+ * given class have the same class ID. Objects of
+ * other classes have different class IDs.
+ * @draft ICU 4.0
+ */
+ virtual UClassID getDynamicClassID(void) const;
+
+
+ /**
+ * Copy constructor.
+ * @draft ICU 4.0
+ */
+ DateInterval(const DateInterval& other);
+
+ /**
+ * Default assignment operator
+ * @draft ICU 4.0
+ */
+ DateInterval& operator=(const DateInterval&);
+
+ /**
+ * Equality operator.
+ * @return TRUE if the two DateIntervals are the same
+ * @draft ICU 4.0
+ */
+ UBool operator==(const DateInterval& other) const;
+
+ /**
+ * Non-equality operator
+ * @return TRUE if the two DateIntervals are not the same
+ * @draft ICU 4.0
+ */
+ UBool operator!=(const DateInterval& other) const;
+
+
+ /**
+ * clone this object.
+ * The caller owns the result and should delete it when done.
+ * @return a cloned DateInterval
+ * @draft ICU 4.0
+ */
+ DateInterval* clone() const;
+
+private:
+ /**
+ * Default constructor, not implemented.
+ * @draft ICU 4.0
+ */
+ DateInterval();
+
+ UDate fromDate;
+ UDate toDate;
+
+} ;// end class DateInterval
+
+
+inline UDate
+DateInterval::getFromDate() const {
+ return fromDate;
+}
+
+
+inline UDate
+DateInterval::getToDate() const {
+ return toDate;
+}
+
+
+inline UBool
+DateInterval::operator==(const DateInterval& other) const {
+ return ( fromDate == other.fromDate && toDate == other.toDate );
+}
+
+
+inline UBool
+DateInterval::operator!=(const DateInterval& other) const {
+ return ( !operator==(other) );
+}
+
+
+inline DateInterval*
+DateInterval::clone() const {
+ return new DateInterval(*this);
+}
+
+
+
+U_NAMESPACE_END
+
+#endif
Modified: trunk/source/common/unicode/utext.h
===================================================================
--- trunk/source/common/unicode/utext.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/common/unicode/utext.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1192,7 +1192,7 @@
* Do not use, reserved for use by the UText framework only.
* @internal
*/
- int32_t reserved1, reserved2, reserved3;
+ int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3;
/**
@@ -1287,6 +1287,10 @@
UTextClose *spare3;
};
+/**
+ * Function dispatch table for UText
+ * @see UTextFuncs
+ */
typedef struct UTextFuncs UTextFuncs;
/**
Modified: trunk/source/config/Makefile.inc.in
===================================================================
--- trunk/source/config/Makefile.inc.in 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/config/Makefile.inc.in 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
## -*-makefile-*-
#******************************************************************************
-# Copyright (C) 1999-2007, International Business Machines
+# Copyright (C) 1999-2008, International Business Machines
# Corporation and others. All Rights Reserved.
#******************************************************************************
# This Makefile.inc is designed to be included into projects which make use
@@ -30,6 +30,7 @@
libdir = @libdir@
libexecdir = @libexecdir@
bindir = @bindir@
+datarootdir = @datarootdir@
datadir = @datadir@
sbindir = @sbindir@
@@ -141,6 +142,10 @@
GENRB = $(bindir)/genrb
PKGDATA = $(bindir)/pkgdata
+# moved here because of dependencies
+pkgdatadir = $(datadir)/$(PACKAGE)$(ICULIBSUFFIX)/$(VERSION)
+pkglibdir = $(libdir)/$(PACKAGE)$(ICULIBSUFFIX)/$(VERSION)
+
##################################################################
##################################################################
#
@@ -214,8 +219,6 @@
#
##################################################################
-pkgdatadir = $(datadir)/$(PACKAGE)$(ICULIBSUFFIX)/$(VERSION)
-pkglibdir = $(libdir)/$(PACKAGE)$(ICULIBSUFFIX)/$(VERSION)
# The basename of the ICU data file (i.e. icudt21b )
ICUDATA_CHAR = @ICUDATA_CHAR@
Modified: trunk/source/configure.in
===================================================================
--- trunk/source/configure.in 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/configure.in 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
dnl -*-m4-*-
dnl configure.in for ICU
-dnl Copyright (c) 1999-2007, International Business Machines Corporation and
+dnl Copyright (c) 1999-2008, International Business Machines Corporation and
dnl others. All Rights Reserved.
dnl Stephen F. Booth, heavily modified by Yves and others
@@ -425,8 +425,15 @@
fi ;;
i*86-*-solaris*)
if test "$GCC" = yes; then
- dnl We're using gcc, and the simple -a gcc command line works for genccode
- GENCCODE_ASSEMBLY="-a gcc"
+ dnl When using gcc, look if we're also using GNU as.
+ dnl When using GNU as, the simple -a gcc command line works for genccode.
+ asv=`"${CC}" -print-prog-name=as 2>/dev/null`
+ asv=`"${asv}" --version 2>/dev/null`
+ case "X${asv}" in
+ X*GNU*) GENCCODE_ASSEMBLY="-a gcc" ;;
+ X*) GENCCODE_ASSEMBLY="-a sun-x86" ;;
+ esac
+ unset asv
else
GENCCODE_ASSEMBLY="-a sun-x86"
fi ;;
Modified: trunk/source/data/misc/zoneinfo.txt
===================================================================
--- trunk/source/data/misc/zoneinfo.txt 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/data/misc/zoneinfo.txt 2008-05-31 14:31:31 UTC (rev 153)
@@ -3,17 +3,17 @@
// Corporation and others. All Rights Reserved.
//---------------------------------------------------------
// Build tool: tz2icu
-// Build date: Tue Mar 25 14:30:03 2008
+// Build date: Tue May 27 11:55:54 2008
// Olson source: ftp://elsie.nci.nih.gov/pub/
-// Olson version: 2008b
-// ICU version: 3.9.2
+// Olson version: 2008c
+// ICU version: 4.0
//---------------------------------------------------------
// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<
// >> !!! >>> DO NOT EDIT <<< !!! <<
//---------------------------------------------------------
zoneinfo:table(nofallback) {
- TZVersion { "2008b" }
+ TZVersion { "2008c" }
Zones:array {
/* ACT */ :int { 328 } //Z#0
, /* AET */ :int { 340 } //Z#1
@@ -93,9 +93,9 @@
:intvector { 3, 18, 374 }
} //Z#18
, /* Africa/Casablanca */ :array {
- :intvector { -2147483648, -1773012580, -956361600, -950490000, -942019200, -761187600, -617241600, -605149200, -81432000, -71110800, 141264000, 147222000, 199756800, 207702000, 231292800, 244249200, 265507200, 271033200, 448243200, 504918000 }
+ :intvector { -2147483648, -1773012580, -956361600, -950490000, -942019200, -761187600, -617241600, -605149200, -81432000, -71110800, 141264000, 147222000, 199756800, 207702000, 231292800, 244249200, 265507200, 271033200, 448243200, 504918000, 1212278400, 1222556400 }
:intvector { -1820, 0, 0, 0, 0, 3600, 3600, 0 }
- :bin { "0001020102010201020102010201020102010301" }
+ :bin { "00010201020102010201020102010201020103010201" }
} //Z#19
, /* Africa/Ceuta */ :array {
:intvector { -2147483648, -1630112400, -1616810400, -1442451600, -1427677200, -1379293200, -1364778000, -1348448400, -1333328400, -1316394000, -1301274000, -1293840000, -81432000, -71110800, 141264000, 147222000, 199756800, 207702000, 231292800, 244249200, 265507200, 271033200, 448243200, 512528400, 528253200, 543978000, 559702800, 575427600, 591152400, 606877200, 622602000, 638326800, 654656400, 670381200, 686106000, 701830800, 717555600, 733280400, 749005200, 764730000, 780454800, 796179600, 811904400, 828234000, 846378000 }
@@ -1297,9 +1297,9 @@
} //Z#235
, /* Asia/Calcutta */ :int { 262 } //Z#236
, /* Asia/Choibalsan */ :array {
- :intvector { -2147483648, -2032933080, 252435600, 417974400, 433778400, 449593200, 465314400, 481042800, 496764000, 512492400, 528213600, 543942000, 559663200, 575391600, 591112800, 606841200, 622562400, 638290800, 654616800, 670345200, 686066400, 701794800, 717516000, 733244400, 748965600, 764694000, 780415200, 796143600, 811864800, 828198000, 843919200, 859647600, 875368800, 891097200, 906818400, 988390800, 1001692800, 1017421200, 1033142400, 1048870800, 1064592000, 1080320400, 1096041600, 1111770000, 1127491200, 1143219600, 1159545600 }
+ :intvector { -2147483648, -2032933080, 252435600, 417974400, 433778400, 449593200, 465314400, 481042800, 496764000, 512492400, 528213600, 543942000, 559663200, 575391600, 591112800, 606841200, 622562400, 638290800, 654616800, 670345200, 686066400, 701794800, 717516000, 733244400, 748965600, 764694000, 780415200, 796143600, 811864800, 828198000, 843919200, 859647600, 875368800, 891097200, 906818400, 988390800, 1001692800, 1017421200, 1033142400, 1048870800, 1064592000, 1080320400, 1096041600, 1111770000, 1127491200, 1143219600, 1159545600, 1206889200 }
:intvector { 25200, 0, 27480, 0, 28800, 0, 32400, 0, 32400, 3600 }
- :bin { "0100020403040304030403040304030403040304030403040304030403040304030403040304030403040304030403" }
+ :bin { "010002040304030403040304030403040304030403040304030403040304030403040304030403040304030403040302" }
} //Z#237
, /* Asia/Chongqing */ :array {
:intvector { -2147483648, -1325487980, 325962000, 515520000, 527007600, 545155200, 558457200, 576604800, 589906800, 608659200, 621961200, 640108800, 653410800, 671558400, 684860400 }
@@ -1408,9 +1408,9 @@
:intvector { 43200, 1997 }
} //Z#258
, /* Asia/Karachi */ :array {
- :intvector { -2147483648, -1988166492, -862637400, -764145000, -576135000, 38775600, 1018119660, 1033840860 }
+ :intvector { -2147483648, -1988166492, -862637400, -764145000, -576135000, 38775600, 1018119660, 1033840860, 1212260400, 1220205600 }
:intvector { 16092, 0, 18000, 0, 18000, 3600, 19800, 0, 19800, 3600 }
- :bin { "0003040301010201" }
+ :bin { "00030403010102010201" }
:intvector { 259, 516 }
} //Z#259
, /* Asia/Kashgar */ :array {
Modified: trunk/source/i18n/Makefile.in
===================================================================
--- trunk/source/i18n/Makefile.in 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/Makefile.in 2008-05-31 14:31:31 UTC (rev 153)
@@ -80,7 +80,7 @@
ulocdata.o measfmt.o currfmt.o curramt.o currunit.o measure.o utmscale.o \
csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.o inputext.o \
windtfmt.o winnmfmt.o basictz.o dtrule.o rbtz.o tzrule.o tztrans.o vtzone.o \
-zonemeta.o zstrfmt.o plurrule.o plurfmt.o
+zonemeta.o zstrfmt.o plurrule.o plurfmt.o dtitvfmt.o dtitvinf.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h
Modified: trunk/source/i18n/datefmt.cpp
===================================================================
--- trunk/source/i18n/datefmt.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/datefmt.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -24,6 +24,7 @@
#include "unicode/ures.h"
#include "unicode/datefmt.h"
#include "unicode/smpdtfmt.h"
+#include "unicode/dtptngen.h"
#include "reldtfmt.h"
#include "cstring.h"
@@ -275,7 +276,35 @@
}
//----------------------------------------------------------------------
+DateFormat* U_EXPORT2
+DateFormat::createPatternInstance(const UnicodeString& skeleton,
+ const Locale& locale)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ DateTimePatternGenerator* dtptg =
+ DateTimePatternGenerator::createInstance(locale, status);
+ if ( dtptg == NULL || U_FAILURE(status) ) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ delete dtptg;
+ return NULL;
+ }
+
+ const UnicodeString pattern = dtptg->getBestPattern(skeleton, status);
+ delete dtptg;
+ if ( U_FAILURE(status) ) {
+ return NULL;
+ }
+ SimpleDateFormat* dtfmt = new SimpleDateFormat(pattern, locale, status);
+ if ( U_FAILURE(status) ) {
+ delete dtfmt;
+ return NULL;
+ }
+ return dtfmt;
+}
+
+//----------------------------------------------------------------------
+
DateFormat* U_EXPORT2
DateFormat::create(EStyle timeStyle, EStyle dateStyle, const Locale& locale)
{
Added: trunk/source/i18n/dtitv_impl.h
===================================================================
--- trunk/source/i18n/dtitv_impl.h (rev 0)
+++ trunk/source/i18n/dtitv_impl.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,93 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTITV_IMPL.H
+*
+*******************************************************************************
+*/
+
+
+#ifndef DTITV_IMPL_H__
+#define DTITV_IMPL_H__
+
+/**
+ * \file
+ * \brief C++ API: Defines macros for interval format implementation
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/unistr.h"
+
+
+#define QUOTE ((UChar)0x0027)
+#define LOW_LINE ((UChar)0x005F)
+#define COLON ((UChar)0x003A)
+#define LEFT_CURLY_BRACKET ((UChar)0x007B)
+#define RIGHT_CURLY_BRACKET ((UChar)0x007D)
+#define SPACE ((UChar)0x0020)
+#define EN_DASH ((UChar)0x2013)
+
+#define DIGIT_ZERO ((UChar)0x0030)
+#define DIGIT_ONE ((UChar)0x0031)
+
+#define LOW_A ((UChar)0x0061)
+#define LOW_B ((UChar)0x0062)
+#define LOW_C ((UChar)0x0063)
+#define LOW_D ((UChar)0x0064)
+#define LOW_E ((UChar)0x0065)
+#define LOW_F ((UChar)0x0066)
+#define LOW_G ((UChar)0x0067)
+#define LOW_H ((UChar)0x0068)
+#define LOW_I ((UChar)0x0069)
+#define LOW_J ((UChar)0x006a)
+#define LOW_K ((UChar)0x006B)
+#define LOW_L ((UChar)0x006C)
+#define LOW_M ((UChar)0x006D)
+#define LOW_N ((UChar)0x006E)
+#define LOW_O ((UChar)0x006F)
+#define LOW_P ((UChar)0x0070)
+#define LOW_Q ((UChar)0x0071)
+#define LOW_R ((UChar)0x0072)
+#define LOW_S ((UChar)0x0073)
+#define LOW_T ((UChar)0x0074)
+#define LOW_U ((UChar)0x0075)
+#define LOW_V ((UChar)0x0076)
+#define LOW_W ((UChar)0x0077)
+#define LOW_Y ((UChar)0x0079)
+#define LOW_Z ((UChar)0x007A)
+
+#define CAP_A ((UChar)0x0041)
+#define CAP_C ((UChar)0x0043)
+#define CAP_D ((UChar)0x0044)
+#define CAP_E ((UChar)0x0045)
+#define CAP_F ((UChar)0x0046)
+#define CAP_G ((UChar)0x0047)
+#define CAP_H ((UChar)0x0048)
+#define CAP_K ((UChar)0x004B)
+#define CAP_L ((UChar)0x004C)
+#define CAP_M ((UChar)0x004D)
+#define CAP_O ((UChar)0x004F)
+#define CAP_Q ((UChar)0x0051)
+#define CAP_S ((UChar)0x0053)
+#define CAP_T ((UChar)0x0054)
+#define CAP_V ((UChar)0x0056)
+#define CAP_W ((UChar)0x0057)
+#define CAP_Y ((UChar)0x0059)
+#define CAP_Z ((UChar)0x005A)
+
+//#define MINIMUM_SUPPORTED_CALENDAR_FIELD UCAL_MINUTE
+
+#define MAX_E_COUNT 5
+#define MAX_M_COUNT 5
+//#define MAX_INTERVAL_INDEX 4
+#define MAX_POSITIVE_INT 56632;
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
+//eof
Added: trunk/source/i18n/dtitvfmt.cpp
===================================================================
--- trunk/source/i18n/dtitvfmt.cpp (rev 0)
+++ trunk/source/i18n/dtitvfmt.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,1417 @@
+/*******************************************************************************
+* Copyright (C) 2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTITVFMT.CPP
+*
+*******************************************************************************
+*/
+
+
+//FIXME: put in compilation
+//#define DTITVFMT_DEBUG 1
+
+#include "unicode/msgfmt.h"
+#include "unicode/dtptngen.h"
+#include "unicode/dtitvfmt.h"
+#include "unicode/dtitvinf.h"
+#include "unicode/calendar.h"
+#include "dtitv_impl.h"
+
+#ifdef DTITVFMT_DEBUG
+#include <iostream>
+#include "cstring.h"
+#endif
+
+
+#if !UCONFIG_NO_FORMATTING
+
+U_NAMESPACE_BEGIN
+
+
+
+#ifdef DTITVFMT_DEBUG
+#define PRINTMESG(msg) { std::cout << "(" << __FILE__ << ":" << __LINE__ << ") " << msg << "\n"; }
+#endif
+
+
+static const UChar gDateFormatSkeleton[][11] = {
+//yMMMMEEEEd
+{LOW_Y, CAP_M, CAP_M, CAP_M, CAP_M, CAP_E, CAP_E, CAP_E, CAP_E, LOW_D, 0},
+//yMMMMd
+{LOW_Y, CAP_M, CAP_M, CAP_M, CAP_M, LOW_D, 0},
+//yMMMd
+{LOW_Y, CAP_M, CAP_M, CAP_M, LOW_D, 0},
+//yMd
+{LOW_Y, CAP_M, LOW_D, 0} };
+
+
+static const char gDateTimePatternsTag[]="DateTimePatterns";
+
+
+// latestFirst:
+static const UChar gLaterFirstPrefix[] = {LOW_L, LOW_A, LOW_T, LOW_E, LOW_S,LOW_T, CAP_F, LOW_I, LOW_R, LOW_S, LOW_T, COLON, 0};
+
+// earliestFirst:
+static const UChar gEarlierFirstPrefix[] = {LOW_E, LOW_A, LOW_R, LOW_L, LOW_I, LOW_E, LOW_S, LOW_T, CAP_F, LOW_I, LOW_R, LOW_S, LOW_T, COLON, 0};
+
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateIntervalFormat)
+
+
+
+DateIntervalFormat* U_EXPORT2
+DateIntervalFormat::createInstance(const UnicodeString& skeleton,
+ UErrorCode& status) {
+ return createInstance(skeleton, Locale::getDefault(), status);
+}
+
+
+DateIntervalFormat* U_EXPORT2
+DateIntervalFormat::createInstance(const UnicodeString& skeleton,
+ const Locale& locale,
+ UErrorCode& status) {
+ if ( U_FAILURE(status) ) {
+ return NULL;
+ }
+ DateFormat* dtfmt = DateFormat::createPatternInstance(skeleton, locale);
+
+#ifdef DTITVFMT_DEBUG
+ char result[1000];
+ char result_1[1000];
+ char mesg[2000];
+ skeleton.extract(0, skeleton.length(), result, "UTF-8");
+ UnicodeString pat;
+ ((SimpleDateFormat*)dtfmt)->toPattern(pat);
+ pat.extract(0, pat.length(), result_1, "UTF-8");
+ sprintf(mesg, "skeleton: %s; pattern: %s\n", result, result_1);
+ PRINTMESG(mesg)
+#endif
+
+ DateIntervalInfo* dtitvinf = new DateIntervalInfo(locale, status);
+ return create(dtfmt, dtitvinf, &skeleton, status);
+}
+
+
+
+DateIntervalFormat* U_EXPORT2
+DateIntervalFormat::createInstance(const UnicodeString& skeleton,
+ DateIntervalInfo* dtitvinf,
+ UErrorCode& status) {
+ return createInstance(skeleton, Locale::getDefault(), dtitvinf, status);
+}
+
+
+DateIntervalFormat* U_EXPORT2
+DateIntervalFormat::createInstance(const UnicodeString& skeleton,
+ const Locale& locale,
+ DateIntervalInfo* dtitvinf,
+ UErrorCode& status) {
+ if ( U_FAILURE(status) ) {
+ delete dtitvinf;
+ return NULL;
+ }
+ DateFormat* dtfmt = DateFormat::createPatternInstance(skeleton, locale);
+ return create(dtfmt, dtitvinf, &skeleton, status);
+}
+
+
+DateIntervalFormat::DateIntervalFormat()
+: fInfo(NULL),
+ fDateFormat(NULL),
+ fFromCalendar(NULL),
+ fToCalendar(NULL)
+{}
+
+
+DateIntervalFormat::DateIntervalFormat(const DateIntervalFormat& itvfmt)
+: Format(itvfmt),
+ fInfo(NULL),
+ fDateFormat(NULL),
+ fFromCalendar(NULL),
+ fToCalendar(NULL) {
+ *this = itvfmt;
+}
+
+
+DateIntervalFormat&
+DateIntervalFormat::operator=(const DateIntervalFormat& itvfmt) {
+ if ( this != &itvfmt ) {
+ delete fDateFormat;
+ delete fInfo;
+ delete fFromCalendar;
+ delete fToCalendar;
+ if ( itvfmt.fDateFormat ) {
+ fDateFormat = (SimpleDateFormat*)itvfmt.fDateFormat->clone();
+ } else {
+ fDateFormat = NULL;
+ }
+ if ( itvfmt.fInfo ) {
+ fInfo = itvfmt.fInfo->clone();
+ } else {
+ fInfo = NULL;
+ }
+ if ( itvfmt.fFromCalendar ) {
+ fFromCalendar = itvfmt.fFromCalendar->clone();
+ } else {
+ fFromCalendar = NULL;
+ }
+ if ( itvfmt.fToCalendar ) {
+ fToCalendar = itvfmt.fToCalendar->clone();
+ } else {
+ fToCalendar = NULL;
+ }
+ fSkeleton = itvfmt.fSkeleton;
+ int8_t i;
+ for ( i = 0; i< DateIntervalInfo::kIPI_MAX_INDEX; ++i ) {
+ fIntervalPatterns[i] = itvfmt.fIntervalPatterns[i];
+ }
+ }
+ return *this;
+}
+
+
+DateIntervalFormat::~DateIntervalFormat() {
+ delete fInfo;
+ delete fDateFormat;
+ delete fFromCalendar;
+ delete fToCalendar;
+}
+
+
+Format*
+DateIntervalFormat::clone(void) const {
+ return new DateIntervalFormat(*this);
+}
+
+
+UBool
+DateIntervalFormat::operator==(const Format& other) const {
+ if ( other.getDynamicClassID() == DateIntervalFormat::getStaticClassID() ) {
+ DateIntervalFormat* fmt = (DateIntervalFormat*)&other;
+#ifdef DTITVFMT_DEBUG
+ UBool equal;
+ equal = (this == fmt);
+
+ equal = (*fInfo == *fmt->fInfo);
+ equal = (*fDateFormat == *fmt->fDateFormat);
+ equal = fFromCalendar->isEquivalentTo(*fmt->fFromCalendar) ;
+ equal = fToCalendar->isEquivalentTo(*fmt->fToCalendar) ;
+ equal = (fSkeleton == fmt->fSkeleton);
+#endif
+ UBool res;
+ res = ( this == fmt ) ||
+ ( Format::operator==(other) &&
+ fInfo &&
+ ( *fInfo == *fmt->fInfo ) &&
+ fDateFormat &&
+ ( *fDateFormat == *fmt->fDateFormat ) &&
+ fFromCalendar &&
+ fFromCalendar->isEquivalentTo(*fmt->fFromCalendar) &&
+ fToCalendar &&
+ fToCalendar->isEquivalentTo(*fmt->fToCalendar) &&
+ fSkeleton == fmt->fSkeleton );
+ int8_t i;
+ for (i = 0; i< DateIntervalInfo::kIPI_MAX_INDEX && res == TRUE; ++i ) {
+ res = ( fIntervalPatterns[i].firstPart ==
+ fmt->fIntervalPatterns[i].firstPart) &&
+ ( fIntervalPatterns[i].secondPart ==
+ fmt->fIntervalPatterns[i].secondPart ) &&
+ ( fIntervalPatterns[i].laterDateFirst ==
+ fmt->fIntervalPatterns[i].laterDateFirst) ;
+ }
+ return res;
+ }
+ return FALSE;
+}
+
+
+
+
+UnicodeString&
+DateIntervalFormat::format(const Formattable& obj,
+ UnicodeString& appendTo,
+ FieldPosition& fieldPosition,
+ UErrorCode& status) const {
+ if ( U_FAILURE(status) ) {
+ return appendTo;
+ }
+
+ if ( obj.getType() == Formattable::kObject ) {
+ const UObject* formatObj = obj.getObject();
+ if (formatObj->getDynamicClassID() == DateInterval::getStaticClassID()){
+ return format((DateInterval*)formatObj, appendTo, fieldPosition, status);
+ }
+ }
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return appendTo;
+}
+
+
+UnicodeString&
+DateIntervalFormat::format(const DateInterval* dtInterval,
+ UnicodeString& appendTo,
+ FieldPosition& fieldPosition,
+ UErrorCode& status) const {
+ if ( U_FAILURE(status) ) {
+ return appendTo;
+ }
+
+ if ( fFromCalendar != NULL && fToCalendar != NULL &&
+ fDateFormat != NULL && fInfo != NULL ) {
+ fFromCalendar->setTime(dtInterval->getFromDate(), status);
+ fToCalendar->setTime(dtInterval->getToDate(), status);
+ if ( U_SUCCESS(status) ) {
+ return format(*fFromCalendar, *fToCalendar, appendTo,fieldPosition, status);
+ }
+ }
+ return appendTo;
+}
+
+
+UnicodeString&
+DateIntervalFormat::format(Calendar& fromCalendar,
+ Calendar& toCalendar,
+ UnicodeString& appendTo,
+ FieldPosition& pos,
+ UErrorCode& status) const {
+ if ( U_FAILURE(status) ) {
+ return appendTo;
+ }
+
+ // not support different calendar types and time zones
+ //if ( fromCalendar.getType() != toCalendar.getType() ) {
+ if ( !fromCalendar.isEquivalentTo(toCalendar) ) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return appendTo;
+ }
+
+ // First, find the largest different calendar field.
+ UCalendarDateFields field = UCAL_FIELD_COUNT;
+
+ if ( fromCalendar.get(UCAL_ERA,status) != toCalendar.get(UCAL_ERA,status)) {
+ field = UCAL_ERA;
+ } else if ( fromCalendar.get(UCAL_YEAR, status) !=
+ toCalendar.get(UCAL_YEAR, status) ) {
+ field = UCAL_YEAR;
+ } else if ( fromCalendar.get(UCAL_MONTH, status) !=
+ toCalendar.get(UCAL_MONTH, status) ) {
+ field = UCAL_MONTH;
+ } else if ( fromCalendar.get(UCAL_DATE, status) !=
+ toCalendar.get(UCAL_DATE, status) ) {
+ field = UCAL_DATE;
+ } else if ( fromCalendar.get(UCAL_AM_PM, status) !=
+ toCalendar.get(UCAL_AM_PM, status) ) {
+ field = UCAL_AM_PM;
+ } else if ( fromCalendar.get(UCAL_HOUR, status) !=
+ toCalendar.get(UCAL_HOUR, status) ) {
+ field = UCAL_HOUR;
+ } else if ( fromCalendar.get(UCAL_MINUTE, status) !=
+ toCalendar.get(UCAL_MINUTE, status) ) {
+ field = UCAL_MINUTE;
+ }
+
+ if ( U_FAILURE(status) ) {
+ return appendTo;
+ }
+ if ( field == UCAL_FIELD_COUNT ) {
+ /* ignore the second/millisecond etc. small fields' difference.
+ * use single date when all the above are the same.
+ */
+ return fDateFormat->format(fromCalendar, appendTo, pos);
+ }
+
+ // following will not set wrong status
+ int32_t itvPtnIndex = DateIntervalInfo::calendarFieldToIntervalIndex(field,
+ status);
+ const PatternInfo& intervalPattern = fIntervalPatterns[itvPtnIndex];
+
+ if ( intervalPattern.firstPart.isEmpty() &&
+ intervalPattern.secondPart.isEmpty() ) {
+ if ( fDateFormat->isFieldUnitIgnored(field) ) {
+ /* the largest different calendar field is small than
+ * the smallest calendar field in pattern,
+ * return single date format.
+ */
+ return fDateFormat->format(fromCalendar, appendTo, pos);
+ }
+ return fallbackFormat(fromCalendar, toCalendar, appendTo, pos, status);
+ }
+ // If the first part in interval pattern is empty,
+ // the 2nd part of it saves the full-pattern used in fall-back.
+ // For a 'real' interval pattern, the first part will never be empty.
+ if ( intervalPattern.firstPart.isEmpty() ) {
+ // fall back
+ UnicodeString originalPattern;
+ fDateFormat->toPattern(originalPattern);
+ fDateFormat->applyPattern(intervalPattern.secondPart);
+ appendTo = fallbackFormat(fromCalendar, toCalendar, appendTo, pos, status);
+ fDateFormat->applyPattern(originalPattern);
+ return appendTo;
+ }
+ Calendar* firstCal;
+ Calendar* secondCal;
+ if ( intervalPattern.laterDateFirst ) {
+ firstCal = &toCalendar;
+ secondCal = &fromCalendar;
+ } else {
+ firstCal = &fromCalendar;
+ secondCal = &toCalendar;
+ }
+ // break the interval pattern into 2 parts,
+ // first part should not be empty,
+ UnicodeString originalPattern;
+ fDateFormat->toPattern(originalPattern);
+ fDateFormat->applyPattern(intervalPattern.firstPart);
+ fDateFormat->format(*firstCal, appendTo, pos);
+ if ( !intervalPattern.secondPart.isEmpty() ) {
+ fDateFormat->applyPattern(intervalPattern.secondPart);
+ fDateFormat->format(*secondCal, appendTo, pos);
+ }
+ fDateFormat->applyPattern(originalPattern);
+ return appendTo;
+}
+
+
+
+void
+DateIntervalFormat::parseObject(const UnicodeString& /* source */,
+ Formattable& /* result */,
+ ParsePosition& /* parse_pos */) const {
+ // FIXME: THERE is no error code,
+ // then, where to set the not-supported error
+}
+
+
+
+
+void
+DateIntervalFormat::setDateFormat(const DateFormat& newDateFormat,
+ UErrorCode& status) {
+ if ( U_FAILURE(status) ) {
+ return;
+ }
+ if ( newDateFormat.getDynamicClassID() == SimpleDateFormat::getStaticClassID() ) {
+ delete fDateFormat;
+ delete fFromCalendar;
+ delete fToCalendar;
+ fDateFormat = new SimpleDateFormat((SimpleDateFormat&)newDateFormat);
+ if ( fDateFormat && fDateFormat->getCalendar() ) {
+ fFromCalendar = fDateFormat->getCalendar()->clone();
+ fToCalendar = fDateFormat->getCalendar()->clone();
+ } else {
+ fFromCalendar = NULL;
+ fToCalendar = NULL;
+ }
+ if ( fInfo ) {
+ initializePattern(status);
+ }
+ } else {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+
+void
+DateIntervalFormat::adoptDateFormat(DateFormat* newDateFormat,
+ UErrorCode& status) {
+ if ( U_FAILURE(status) ) {
+ return;
+ }
+ if ( newDateFormat->getDynamicClassID() == SimpleDateFormat::getStaticClassID() ) {
+ delete fDateFormat;
+ delete fFromCalendar;
+ delete fToCalendar;
+ fDateFormat = (SimpleDateFormat*)newDateFormat;
+ if ( fDateFormat && fDateFormat->getCalendar() ) {
+ fFromCalendar = fDateFormat->getCalendar()->clone();
+ fToCalendar = fDateFormat->getCalendar()->clone();
+ } else {
+ fFromCalendar = NULL;
+ fToCalendar = NULL;
+ }
+ if ( fInfo ) {
+ initializePattern(status);
+ }
+ } else {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+
+DateIntervalFormat::DateIntervalFormat(DateFormat* dtfmt,
+ DateIntervalInfo* dtItvInfo,
+ UErrorCode& status)
+{
+ DateIntervalFormat(dtfmt, dtItvInfo, NULL, status);
+}
+
+
+
+DateIntervalFormat::DateIntervalFormat(DateFormat* dtfmt,
+ DateIntervalInfo* dtItvInfo,
+ const UnicodeString* skeleton,
+ UErrorCode& status)
+: fInfo(0),
+ fDateFormat(0),
+ fFromCalendar(0),
+ fToCalendar(0)
+{
+ if ( U_FAILURE(status) ) {
+ delete dtfmt;
+ delete dtItvInfo;
+ return;
+ }
+ if ( dtfmt == NULL || dtItvInfo == NULL ) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ // safe to delete NULL
+ delete dtfmt;
+ delete dtItvInfo;
+ return;
+ }
+ if ( skeleton ) {
+ fSkeleton = *skeleton;
+ }
+ fInfo = dtItvInfo;
+ fDateFormat = (SimpleDateFormat*)dtfmt;
+ if ( dtfmt->getCalendar() ) {
+ fFromCalendar = dtfmt->getCalendar()->clone();
+ fToCalendar = dtfmt->getCalendar()->clone();
+ } else {
+ fFromCalendar = NULL;
+ fToCalendar = NULL;
+ }
+ initializePattern(status);
+}
+
+
+
+DateIntervalFormat* U_EXPORT2
+DateIntervalFormat::create(DateFormat* dtfmt,
+ DateIntervalInfo* dtItvInfo,
+ UErrorCode& status) {
+ return create(dtfmt, dtItvInfo, NULL, status);
+}
+
+
+
+DateIntervalFormat* U_EXPORT2
+DateIntervalFormat::create(DateFormat* dtfmt,
+ DateIntervalInfo* dtitvinf,
+ const UnicodeString* skeleton,
+ UErrorCode& status) {
+ DateIntervalFormat* f = new DateIntervalFormat(dtfmt, dtitvinf,
+ skeleton, status);
+ if ( f == NULL ) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ delete dtfmt;
+ delete dtitvinf;
+ } else if ( U_FAILURE(status) ) {
+ // safe to delete f, although nothing acutally is saved
+ delete f;
+ f = 0;
+ }
+ return f;
+}
+
+
+
+/**
+ * Initialize interval patterns locale to this formatter
+ *
+ * This code is a bit complicated since
+ * 1. the interval patterns saved in resource bundle files are interval
+ * patterns based on date or time only.
+ * It does not have interval patterns based on both date and time.
+ * Interval patterns on both date and time are algorithm generated.
+ *
+ * For example, it has interval patterns on skeleton "dMy" and "hm",
+ * but it does not have interval patterns on skeleton "dMyhm".
+ *
+ * The rule to genearte interval patterns for both date and time skeleton are
+ * 1) when the year, month, or day differs, concatenate the two original
+ * expressions with a separator between,
+ * For example, interval pattern from "Jan 10, 2007 10:10 am"
+ * to "Jan 11, 2007 10:10am" is
+ * "Jan 10, 2007 10:10 am - Jan 11, 2007 10:10am"
+ *
+ * 2) otherwise, present the date followed by the range expression
+ * for the time.
+ * For example, interval pattern from "Jan 10, 2007 10:10 am"
+ * to "Jan 10, 2007 11:10am" is
+ * "Jan 10, 2007 10:10 am - 11:10am"
+ *
+ * 2. even a pattern does not request a certion calendar field,
+ * the interval pattern needs to include such field if such fields are
+ * different between 2 dates.
+ * For example, a pattern/skeleton is "hm", but the interval pattern
+ * includes year, month, and date when year, month, and date differs.
+ *
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+void
+DateIntervalFormat::initializePattern(UErrorCode& status) {
+ // FIXME: WHY can not getLocale()
+ const Locale& locale = fDateFormat->getSmpFmtLocale();
+ DateTimePatternGenerator* dtpng = DateTimePatternGenerator::createInstance(locale, status);
+ if ( U_FAILURE(status) ) {
+ delete dtpng;
+ return;
+ }
+ if ( fSkeleton.isEmpty() ) {
+ UnicodeString fullPattern;
+ fDateFormat->toPattern(fullPattern);
+#ifdef DTITVFMT_DEBUG
+ char result[1000];
+ char result_1[1000];
+ char mesg[2000];
+ fSkeleton.extract(0, fSkeleton.length(), result, "UTF-8");
+ sprintf(mesg, "in getBestSkeleton: fSkeleton: %s; \n", result);
+ PRINTMESG(mesg)
+#endif
+ // fSkeleton is already set by createDateIntervalInstance()
+ // or by createInstance(UnicodeString skeleton, .... )
+ fSkeleton = dtpng->getSkeleton(fullPattern, status);
+ if ( U_FAILURE(status) ) {
+ delete dtpng;
+ return;
+ }
+ }
+
+ // initialize the fIntervalPattern ordering
+ int8_t i;
+ for ( i = 0; i < DateIntervalInfo::kIPI_MAX_INDEX; ++i ) {
+ fIntervalPatterns[i].laterDateFirst = fInfo->getDefaultOrder();
+ }
+
+ /* Check whether the skeleton is a combination of date and time.
+ * For the complication reason 1 explained above.
+ */
+ UnicodeString dateSkeleton;
+ UnicodeString timeSkeleton;
+ UnicodeString normalizedTimeSkeleton;
+ UnicodeString normalizedDateSkeleton;
+
+
+ /* the difference between time skeleton and normalizedTimeSkeleton are:
+ * 1. both 'H' and 'h' are normalized as 'h' in normalized time skeleton,
+ * 2. 'a' is omitted in normalized time skeleton.
+ * 3. there is only one appearance for 'h', 'm','v', 'z' in normalized
+ * time skeleton
+ *
+ * The difference between date skeleton and normalizedDateSkeleton are:
+ * 1. both 'y' and 'd' appear only once in normalizeDateSkeleton
+ * 2. 'E' and 'EE' are normalized into 'EEE'
+ * 3. 'MM' is normalized into 'M'
+ */
+ getDateTimeSkeleton(fSkeleton, dateSkeleton, normalizedDateSkeleton,
+ timeSkeleton, normalizedTimeSkeleton);
+
+#ifdef DTITVFMT_DEBUG
+ char result[1000];
+ char result_1[1000];
+ char mesg[2000];
+ fSkeleton.extract(0, fSkeleton.length(), result, "UTF-8");
+ sprintf(mesg, "in getBestSkeleton: fSkeleton: %s; \n", result);
+ PRINTMESG(mesg)
+#endif
+
+
+ UBool found = setSeparateDateTimePtn(normalizedDateSkeleton,
+ normalizedTimeSkeleton);
+
+ if ( found == false ) {
+ // use fallback
+ // TODO: if user asks "m"(minute), but "d"(day) differ
+ if ( timeSkeleton.length() != 0 ) {
+ if ( dateSkeleton.length() == 0 ) {
+ // prefix with yMd
+ timeSkeleton.insert(0, gDateFormatSkeleton[DateFormat::kShort]);
+ UnicodeString pattern =dtpng->getBestPattern(timeSkeleton, status);
+ if ( U_FAILURE(status) ) {
+ delete dtpng;
+ return;
+ }
+ // for fall back interval patterns,
+ // the first part of the pattern is empty,
+ // the second part of the pattern is the full-pattern
+ // should be used in fall-back.
+ setPatternInfo(UCAL_DATE, NULL, &pattern, fInfo->getDefaultOrder());
+ setPatternInfo(UCAL_MONTH, NULL, &pattern, fInfo->getDefaultOrder());
+ setPatternInfo(UCAL_YEAR, NULL, &pattern, fInfo->getDefaultOrder());
+ } else {
+ // TODO: fall back
+ }
+ } else {
+ // TODO: fall back
+ }
+ delete dtpng;
+ return;
+ } // end of skeleton not found
+ // interval patterns for skeleton are found in resource
+ if ( timeSkeleton.length() == 0 ) {
+ // done
+ } else if ( dateSkeleton.length() == 0 ) {
+ // prefix with yMd
+ timeSkeleton.insert(0, gDateFormatSkeleton[DateFormat::kShort]);
+ UnicodeString pattern =dtpng->getBestPattern(timeSkeleton, status);
+ if ( U_FAILURE(status) ) {
+ delete dtpng;
+ return;
+ }
+ // for fall back interval patterns,
+ // the first part of the pattern is empty,
+ // the second part of the pattern is the full-pattern
+ // should be used in fall-back.
+ setPatternInfo(UCAL_DATE, NULL, &pattern, fInfo->getDefaultOrder());
+ setPatternInfo(UCAL_MONTH, NULL, &pattern, fInfo->getDefaultOrder());
+ setPatternInfo(UCAL_YEAR, NULL, &pattern, fInfo->getDefaultOrder());
+ } else {
+ /* if both present,
+ * 1) when the year, month, or day differs,
+ * concatenate the two original expressions with a separator between,
+ * 2) otherwise, present the date followed by the
+ * range expression for the time.
+ */
+ /*
+ * 1) when the year, month, or day differs,
+ * concatenate the two original expressions with a separator between,
+ */
+ // if field exists, use fall back
+ UnicodeString skeleton = fSkeleton;
+ if ( !fieldExistsInSkeleton(UCAL_DATE, dateSkeleton) ) {
+ // prefix skeleton with 'd'
+ skeleton.insert(0, LOW_D);
+ setFallbackPattern(UCAL_DATE, skeleton, dtpng, status);
+ }
+ if ( !fieldExistsInSkeleton(UCAL_MONTH, dateSkeleton) ) {
+ // then prefix skeleton with 'M'
+ skeleton.insert(0, CAP_M);
+ setFallbackPattern(UCAL_MONTH, skeleton, dtpng, status);
+ }
+ if ( !fieldExistsInSkeleton(UCAL_YEAR, dateSkeleton) ) {
+ // then prefix skeleton with 'y'
+ skeleton.insert(0, LOW_Y);
+ setFallbackPattern(UCAL_YEAR, skeleton, dtpng, status);
+ }
+
+ /*
+ * 2) otherwise, present the date followed by the
+ * range expression for the time.
+ */
+ // Need the Date/Time pattern for concatnation the date with
+ // the time interval.
+ // The date/time pattern ( such as {0} {1} ) is saved in
+ // calendar, that is why need to get the CalendarData here.
+ CalendarData* calData = new CalendarData(locale, NULL, status);
+
+ if ( calData == NULL ) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ delete dtpng;
+ return;
+ }
+
+ const UResourceBundle* dateTimePatternsRes = calData->getByKey(
+ gDateTimePatternsTag, status);
+ int32_t dateTimeFormatLength;
+ const UChar* dateTimeFormat = ures_getStringByIndex(
+ dateTimePatternsRes,
+ (int32_t)DateFormat::kDateTime,
+ &dateTimeFormatLength, &status);
+ if ( U_FAILURE(status) ) {
+ delete dtpng;
+ return;
+ }
+
+ UnicodeString datePattern = dtpng->getBestPattern(dateSkeleton, status);
+
+ concatSingleDate2TimeInterval(dateTimeFormat, dateTimeFormatLength,
+ datePattern, UCAL_AM_PM, status);
+ concatSingleDate2TimeInterval(dateTimeFormat, dateTimeFormatLength,
+ datePattern, UCAL_HOUR, status);
+ concatSingleDate2TimeInterval(dateTimeFormat, dateTimeFormatLength,
+ datePattern, UCAL_MINUTE, status);
+ delete calData;
+ }
+ delete dtpng;
+}
+
+
+
+void U_EXPORT2
+DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton,
+ UnicodeString& dateSkeleton,
+ UnicodeString& normalizedDateSkeleton,
+ UnicodeString& timeSkeleton,
+ UnicodeString& normalizedTimeSkeleton) {
+ // dateSkeleton follows the sequence of y*M*E*d*
+ // timeSkeleton follows the sequence of hm*[v|z]?
+ int32_t ECount = 0;
+ int32_t dCount = 0;
+ int32_t MCount = 0;
+ int32_t yCount = 0;
+ int32_t hCount = 0;
+ int32_t mCount = 0;
+ int32_t vCount = 0;
+ int32_t zCount = 0;
+ int32_t i;
+
+ for (i = 0; i < skeleton.length(); ++i) {
+ UChar ch = skeleton[i];
+ switch ( ch ) {
+ case CAP_E:
+ dateSkeleton.append(ch);
+ ++ECount;
+ break;
+ case LOW_D:
+ dateSkeleton.append(ch);
+ ++dCount;
+ break;
+ case CAP_M:
+ dateSkeleton.append(ch);
+ ++MCount;
+ break;
+ case LOW_Y:
+ dateSkeleton.append(ch);
+ ++yCount;
+ break;
+ case CAP_G:
+ case CAP_Y:
+ case LOW_U:
+ case CAP_Q:
+ case LOW_Q:
+ case CAP_L:
+ case LOW_L:
+ case CAP_W:
+ case LOW_W:
+ case CAP_D:
+ case CAP_F:
+ case LOW_G:
+ case LOW_E:
+ case LOW_C:
+ normalizedDateSkeleton.append(ch);
+ dateSkeleton.append(ch);
+ break;
+ case LOW_A:
+ // 'a' is implicitly handled
+ timeSkeleton.append(ch);
+ break;
+ case LOW_H:
+ case CAP_H:
+ timeSkeleton.append(ch);
+ ++hCount;
+ break;
+ case LOW_M:
+ timeSkeleton.append(ch);
+ ++mCount;
+ break;
+ case LOW_Z:
+ ++zCount;
+ timeSkeleton.append(ch);
+ break;
+ case LOW_V:
+ ++vCount;
+ timeSkeleton.append(ch);
+ break;
+ // FIXME: what is the difference between CAP_V/Z and LOW_V/Z
+ case CAP_V:
+ case CAP_Z:
+ case LOW_K:
+ case CAP_K:
+ case LOW_J:
+ case LOW_S:
+ case CAP_S:
+ case CAP_A:
+ timeSkeleton.append(ch);
+ normalizedTimeSkeleton.append(ch);
+ break;
+ }
+ }
+
+ /* generate normalized form for date*/
+ if ( yCount != 0 ) {
+ normalizedDateSkeleton.append(LOW_Y);
+ }
+ if ( MCount != 0 ) {
+ if ( MCount < 3 ) {
+ normalizedDateSkeleton.append(CAP_M);
+ } else {
+ int32_t i;
+ for ( i = 0; i < MCount && i < MAX_M_COUNT; ++i ) {
+ normalizedDateSkeleton.append(CAP_M);
+ }
+ }
+ }
+ if ( ECount != 0 ) {
+ if ( ECount <= 3 ) {
+ normalizedDateSkeleton.append(CAP_E);
+ normalizedDateSkeleton.append(CAP_E);
+ normalizedDateSkeleton.append(CAP_E);
+ } else {
+ int32_t i;
+ for ( i = 0; i < ECount && i < MAX_E_COUNT; ++i ) {
+ normalizedDateSkeleton.append(CAP_E);
+ }
+ }
+ }
+ if ( dCount != 0 ) {
+ normalizedDateSkeleton.append(LOW_D);
+ }
+
+ /* generate normalized form for time */
+ if ( hCount != 0 ) {
+ normalizedTimeSkeleton.append(LOW_H);
+ }
+ if ( mCount != 0 ) {
+ normalizedTimeSkeleton.append(LOW_M);
+ }
+ if ( zCount != 0 ) {
+ normalizedTimeSkeleton.append(LOW_Z);
+ }
+ if ( vCount != 0 ) {
+ normalizedTimeSkeleton.append(LOW_V);
+ }
+}
+
+
+/**
+ * Generate date or time interval pattern from resource,
+ * and set them into the interval pattern locale to this formatter.
+ *
+ * It needs to handle the following:
+ * 1. need to adjust field width.
+ * For example, the interval patterns saved in DateIntervalInfo
+ * includes "dMMMy", but not "dMMMMy".
+ * Need to get interval patterns for dMMMMy from dMMMy.
+ * Another example, the interval patterns saved in DateIntervalInfo
+ * includes "hmv", but not "hmz".
+ * Need to get interval patterns for "hmz' from 'hmv'
+ *
+ * 2. there might be no pattern for 'y' differ for skeleton "Md",
+ * in order to get interval patterns for 'y' differ,
+ * need to look for it from skeleton 'yMd'
+ *
+ * @param dateSkeleton normalized date skeleton
+ * @param timeSkeleton normalized time skeleton
+ * @return whether the resource is found for the skeleton.
+ * TRUE if interval pattern found for the skeleton,
+ * FALSE otherwise.
+ * @draft ICU 4.0
+ */
+UBool
+DateIntervalFormat::setSeparateDateTimePtn(
+ const UnicodeString& dateSkeleton,
+ const UnicodeString& timeSkeleton) {
+ const UnicodeString* skeleton;
+ // if both date and time skeleton present,
+ // the final interval pattern might include time interval patterns
+ // ( when, am_pm, hour, minute differ ),
+ // but not date interval patterns ( when year, month, day differ ).
+ // For year/month/day differ, it falls back to fall-back pattern.
+ if ( timeSkeleton.length() != 0 ) {
+ skeleton = &timeSkeleton;
+ } else {
+ skeleton = &dateSkeleton;
+ }
+
+ /* interval patterns for skeleton "dMMMy" (but not "dMMMMy")
+ * are defined in resource,
+ * interval patterns for skeleton "dMMMMy" are calculated by
+ * 1. get the best match skeleton for "dMMMMy", which is "dMMMy"
+ * 2. get the interval patterns for "dMMMy",
+ * 3. extend "MMM" to "MMMM" in above interval patterns for "dMMMMy"
+ * getBestSkeleton() is step 1.
+ */
+ // best skeleton, and the difference information
+ int8_t differenceInfo = 0;
+ const UnicodeString* bestSkeleton = fInfo->getBestSkeleton(*skeleton,
+ differenceInfo);
+
+ // difference:
+ // 0 means the best matched skeleton is the same as input skeleton
+ // 1 means the fields are the same, but field width are different
+ // 2 means the only difference between fields are v/z,
+ // -1 means there are other fields difference
+ if ( differenceInfo == -1 ) {
+ // skeleton has different fields, not only v/z difference
+ return false;
+ }
+
+ if ( timeSkeleton.length() == 0 ) {
+ UnicodeString extendedSkeleton;
+ UnicodeString extendedBestSkeleton;
+ // only has date skeleton
+ setIntervalPattern(UCAL_DATE, skeleton, bestSkeleton, differenceInfo,
+ &extendedSkeleton, &extendedBestSkeleton);
+
+ UBool extended = setIntervalPattern(UCAL_MONTH, skeleton, bestSkeleton,
+ differenceInfo,
+ &extendedSkeleton, &extendedBestSkeleton);
+
+ if ( extended ) {
+ bestSkeleton = &extendedBestSkeleton;
+ skeleton = &extendedSkeleton;
+ }
+ setIntervalPattern(UCAL_YEAR, skeleton, bestSkeleton, differenceInfo,
+ &extendedSkeleton, &extendedBestSkeleton);
+ } else {
+ setIntervalPattern(UCAL_MINUTE, skeleton, bestSkeleton, differenceInfo);
+ setIntervalPattern(UCAL_HOUR, skeleton, bestSkeleton, differenceInfo);
+ setIntervalPattern(UCAL_AM_PM, skeleton, bestSkeleton, differenceInfo);
+ }
+ return true;
+}
+
+
+
+void
+DateIntervalFormat::setFallbackPattern(UCalendarDateFields field,
+ const UnicodeString& skeleton,
+ DateTimePatternGenerator* dtpng,
+ UErrorCode& status) {
+ if ( U_FAILURE(status) ) {
+ return;
+ }
+ UnicodeString pattern =dtpng->getBestPattern(skeleton, status);
+ if ( U_FAILURE(status) ) {
+ return;
+ }
+ setPatternInfo(field, NULL, &pattern, fInfo->getDefaultOrder());
+}
+
+
+
+
+void
+DateIntervalFormat::setPatternInfo(UCalendarDateFields field,
+ const UnicodeString* firstPart,
+ const UnicodeString* secondPart,
+ UBool laterDateFirst) {
+ // for fall back interval patterns,
+ // the first part of the pattern is empty,
+ // the second part of the pattern is the full-pattern
+ // should be used in fall-back.
+ UErrorCode status = U_ZERO_ERROR;
+ // following will not set any wrong status.
+ int32_t itvPtnIndex = DateIntervalInfo::calendarFieldToIntervalIndex(field,
+ status);
+ PatternInfo& ptn = fIntervalPatterns[itvPtnIndex];
+ if ( firstPart ) {
+ ptn.firstPart = *firstPart;
+ }
+ if ( secondPart ) {
+ ptn.secondPart = *secondPart;
+ }
+ ptn.laterDateFirst = laterDateFirst;
+}
+
+void
+DateIntervalFormat::setIntervalPattern(UCalendarDateFields field,
+ const UnicodeString& intervalPattern) {
+ UBool order = fInfo->getDefaultOrder();
+ setIntervalPattern(field, intervalPattern, order);
+}
+
+
+void
+DateIntervalFormat::setIntervalPattern(UCalendarDateFields field,
+ const UnicodeString& intervalPattern,
+ UBool laterDateFirst) {
+ const UnicodeString* pattern = &intervalPattern;
+ UBool order = laterDateFirst;
+ // check for "latestFirst:" or "earliestFirst:" prefix
+ int8_t prefixLength = sizeof(gLaterFirstPrefix)/sizeof(gLaterFirstPrefix[0]);
+ int8_t earliestFirstLength = sizeof(gEarlierFirstPrefix)/sizeof(gEarlierFirstPrefix[0]);
+ UnicodeString realPattern;
+ if ( intervalPattern.startsWith(gLaterFirstPrefix, prefixLength) ) {
+ order = true;
+ intervalPattern.extract(prefixLength,
+ intervalPattern.length() - prefixLength,
+ realPattern);
+ pattern = &realPattern;
+ } else if ( intervalPattern.startsWith(gEarlierFirstPrefix,
+ earliestFirstLength) ) {
+ order = false;
+ intervalPattern.extract(earliestFirstLength,
+ intervalPattern.length() - earliestFirstLength,
+ realPattern);
+ pattern = &realPattern;
+ }
+
+ int32_t splitPoint = splitPatternInto2Part(*pattern);
+
+ UnicodeString firstPart;
+ UnicodeString secondPart;
+ pattern->extract(0, splitPoint, firstPart);
+ if ( splitPoint < pattern->length() ) {
+ pattern->extract(splitPoint, pattern->length()-splitPoint, secondPart);
+ }
+ setPatternInfo(field, &firstPart, &secondPart, order);
+}
+
+
+
+
+/**
+ * Generate interval pattern from existing resource
+ *
+ * It not only save the interval patterns,
+ * but also return the extended skeleton and its best match skeleton.
+ *
+ * @param field largest different calendar field
+ * @param skeleton skeleton
+ * @param bestSkeleton the best match skeleton which has interval pattern
+ * defined in resource
+ * @param differenceInfo the difference between skeleton and best skeleton
+ * 0 means the best matched skeleton is the same as input skeleton
+ * 1 means the fields are the same, but field width are different
+ * 2 means the only difference between fields are v/z,
+ * -1 means there are other fields difference
+ *
+ * @param extendedSkeleton extended skeleton
+ * @param extendedBestSkeleton extended best match skeleton
+ * @return whether the interval pattern is found
+ * through extending skeleton or not.
+ * TRUE if interval pattern is found by
+ * extending skeleton, FALSE otherwise.
+ * @draft ICU 4.0
+ */
+UBool
+DateIntervalFormat::setIntervalPattern(UCalendarDateFields field,
+ const UnicodeString* skeleton,
+ const UnicodeString* bestSkeleton,
+ int8_t differenceInfo,
+ UnicodeString* extendedSkeleton,
+ UnicodeString* extendedBestSkeleton) {
+ UErrorCode status = U_ZERO_ERROR;
+ // following getIntervalPattern() should not generate error status
+ const UnicodeString* pattern = fInfo->getIntervalPattern(*bestSkeleton,
+ field, status);
+ if ( pattern == NULL ) {
+ // single date
+ if ( SimpleDateFormat::isFieldUnitIgnored(*bestSkeleton, field) ) {
+ // do nothing, format will handle it
+ return false;
+ }
+
+ // for 24 hour system, interval patterns in resource file
+ // might not include pattern when am_pm differ,
+ // which should be the same as hour differ.
+ // add it here for simplicity
+ if ( field == UCAL_AM_PM ) {
+ pattern = fInfo->getIntervalPattern(*bestSkeleton,
+ UCAL_HOUR,
+ status);
+ if ( pattern != NULL ) {
+ setIntervalPattern(field, *pattern);
+ }
+ return false;
+ }
+ // else, looking for pattern when 'y' differ for 'dMMMM' skeleton,
+ // first, get best match pattern "MMMd",
+ // since there is no pattern for 'y' differs for skeleton 'MMMd',
+ // need to look for it from skeleton 'yMMMd',
+ // if found, adjust field width in interval pattern from
+ // "MMM" to "MMMM".
+ UChar fieldLetter = fgCalendarFieldToPatternLetter[field];
+ if ( extendedSkeleton ) {
+ *extendedSkeleton = *skeleton;
+ *extendedBestSkeleton = *bestSkeleton;
+ extendedSkeleton->insert(0, fieldLetter);
+ extendedBestSkeleton->insert(0, fieldLetter);
+ pattern = fInfo->getIntervalPattern(*extendedBestSkeleton, field, status);
+ }
+ }
+ if ( pattern != NULL ) {
+ if ( differenceInfo != 0 ) {
+ UnicodeString adjustIntervalPattern;
+ adjustFieldWidth(*skeleton, *bestSkeleton, *pattern, differenceInfo,
+ adjustIntervalPattern);
+ setIntervalPattern(field, adjustIntervalPattern);
+ } else {
+ setIntervalPattern(field, *pattern);
+ }
+ if ( extendedSkeleton && !extendedSkeleton->isEmpty() ) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+
+int32_t U_EXPORT2
+DateIntervalFormat::splitPatternInto2Part(const UnicodeString& intervalPattern) {
+ UBool inQuote = false;
+ UChar prevCh = 0;
+ int32_t count = 0;
+
+ /* repeatedPattern used to record whether a pattern has already seen.
+ It is a pattern applies to first calendar if it is first time seen,
+ otherwise, it is a pattern applies to the second calendar
+ */
+ UBool patternRepeated[] =
+ {
+ // A B C D E F G H I J K L M N O
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // P Q R S T U V W X Y Z
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // a b c d e f g h i j k l m n o
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // p q r s t u v w x y z
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+
+ int8_t PATTERN_CHAR_BASE = 0x41;
+
+ /* loop through the pattern string character by character looking for
+ * the first repeated pattern letter, which breaks the interval pattern
+ * into 2 parts.
+ */
+ int32_t i;
+ UBool foundRepetition = false;
+ for (i = 0; i < intervalPattern.length(); ++i) {
+ UChar ch = intervalPattern.charAt(i);
+
+ if (ch != prevCh && count > 0) {
+ // check the repeativeness of pattern letter
+ UBool repeated = patternRepeated[(int)(prevCh - PATTERN_CHAR_BASE)];
+ if ( repeated == FALSE ) {
+ patternRepeated[prevCh - PATTERN_CHAR_BASE] = TRUE;
+ } else {
+ foundRepetition = true;
+ break;
+ }
+ count = 0;
+ }
+ if (ch == '\'') {
+ // Consecutive single quotes are a single quote literal,
+ // either outside of quotes or between quotes
+ if ((i+1) < intervalPattern.length() &&
+ intervalPattern.charAt(i+1) == '\'') {
+ ++i;
+ } else {
+ inQuote = ! inQuote;
+ }
+ }
+ else if (!inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
+ || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
+ // ch is a date-time pattern character
+ prevCh = ch;
+ ++count;
+ }
+ }
+ // check last pattern char, distinguish
+ // "dd MM" ( no repetition ),
+ // "d-d"(last char repeated ), and
+ // "d-d MM" ( repetition found )
+ if ( count > 0 && foundRepetition == FALSE ) {
+ if ( patternRepeated[(int)(prevCh - PATTERN_CHAR_BASE)] == FALSE ) {
+ count = 0;
+ }
+ }
+ return (i - count);
+}
+
+
+
+UnicodeString&
+DateIntervalFormat::fallbackFormat(Calendar& fromCalendar,
+ Calendar& toCalendar,
+ UnicodeString& appendTo,
+ FieldPosition& pos,
+ UErrorCode& status) const {
+ // the fall back
+ // no need delete earlierDate and laterDate since they are adopted
+ UnicodeString* earlierDate = new UnicodeString();
+ *earlierDate = fDateFormat->format(fromCalendar, *earlierDate, pos);
+ UnicodeString* laterDate = new UnicodeString();
+ *laterDate = fDateFormat->format(toCalendar, *laterDate, pos);
+ const UnicodeString& fallbackPattern = fInfo->getFallbackIntervalPattern();
+ Formattable fmtArray[2];
+ fmtArray[0].adoptString(earlierDate);
+ fmtArray[1].adoptString(laterDate);
+
+ UnicodeString fallback;
+ MessageFormat::format(fallbackPattern, fmtArray, 2, fallback, status);
+ if ( U_FAILURE(status) ) {
+ return appendTo;
+ }
+ appendTo.append(fallback);
+ return appendTo;
+}
+
+
+
+
+UBool U_EXPORT2
+DateIntervalFormat::fieldExistsInSkeleton(UCalendarDateFields field,
+ const UnicodeString& skeleton)
+{
+ const UChar fieldChar = fgCalendarFieldToPatternLetter[field];
+ return ( (skeleton.indexOf(fieldChar) == -1)?FALSE:TRUE ) ;
+}
+
+
+
+void U_EXPORT2
+DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton,
+ const UnicodeString& bestMatchSkeleton,
+ const UnicodeString& bestIntervalPattern,
+ int8_t differenceInfo,
+ UnicodeString& adjustedPtn) {
+ adjustedPtn = bestIntervalPattern;
+ int32_t inputSkeletonFieldWidth[] =
+ {
+ // A B C D E F G H I J K L M N O
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // P Q R S T U V W X Y Z
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // a b c d e f g h i j k l m n o
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // p q r s t u v w x y z
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+
+ int32_t bestMatchSkeletonFieldWidth[] =
+ {
+ // A B C D E F G H I J K L M N O
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // P Q R S T U V W X Y Z
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // a b c d e f g h i j k l m n o
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // p q r s t u v w x y z
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+
+ DateIntervalInfo::parseSkeleton(inputSkeleton, inputSkeletonFieldWidth);
+ DateIntervalInfo::parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth);
+ if ( differenceInfo == 2 ) {
+ adjustedPtn.findAndReplace("v", "z");
+ }
+
+ UBool inQuote = false;
+ UChar prevCh = 0;
+ int32_t count = 0;
+
+ const int8_t PATTERN_CHAR_BASE = 0x41;
+
+ // loop through the pattern string character by character
+ int32_t adjustedPtnLength = adjustedPtn.length();
+ int32_t i;
+ for (i = 0; i < adjustedPtnLength; ++i) {
+ UChar ch = adjustedPtn.charAt(i);
+ if (ch != prevCh && count > 0) {
+ // check the repeativeness of pattern letter
+ UChar skeletonChar = prevCh;
+ if ( skeletonChar == CAP_L ) {
+ // there is no "L" (always be "M") in skeleton,
+ // but there is "L" in pattern.
+ // for skeleton "M+", the pattern might be "...L..."
+ skeletonChar = CAP_M;
+ }
+ int32_t fieldCount = bestMatchSkeletonFieldWidth[(int)(skeletonChar - PATTERN_CHAR_BASE)];
+ int32_t inputFieldCount = inputSkeletonFieldWidth[(int)(skeletonChar - PATTERN_CHAR_BASE)];
+ if ( fieldCount == count && inputFieldCount > fieldCount ) {
+ count = inputFieldCount - fieldCount;
+ int32_t j;
+ for ( j = 0; j < count; ++j ) {
+ adjustedPtn.insert(i, prevCh);
+ }
+ i += count;
+ adjustedPtnLength += count;
+ }
+ count = 0;
+ }
+ if (ch == '\'') {
+ // Consecutive single quotes are a single quote literal,
+ // either outside of quotes or between quotes
+ if ((i+1) < adjustedPtn.length() && adjustedPtn.charAt(i+1) == '\'') {
+ ++i;
+ } else {
+ inQuote = ! inQuote;
+ }
+ }
+ else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
+ || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
+ // ch is a date-time pattern character
+ prevCh = ch;
+ ++count;
+ }
+ }
+ if ( count > 0 ) {
+ // last item
+ // check the repeativeness of pattern letter
+ UChar skeletonChar = prevCh;
+ if ( skeletonChar == CAP_L ) {
+ // there is no "L" (always be "M") in skeleton,
+ // but there is "L" in pattern.
+ // for skeleton "M+", the pattern might be "...L..."
+ skeletonChar = CAP_M;
+ }
+ int32_t fieldCount = bestMatchSkeletonFieldWidth[(int)(skeletonChar - PATTERN_CHAR_BASE)];
+ int32_t inputFieldCount = inputSkeletonFieldWidth[(int)(skeletonChar - PATTERN_CHAR_BASE)];
+ if ( fieldCount == count && inputFieldCount > fieldCount ) {
+ count = inputFieldCount - fieldCount;
+ int32_t j;
+ for ( j = 0; j < count; ++j ) {
+ adjustedPtn.append(prevCh);
+ }
+ }
+ }
+}
+
+
+
+void
+DateIntervalFormat::concatSingleDate2TimeInterval(const UChar* format,
+ int32_t formatLen,
+ const UnicodeString& datePattern,
+ UCalendarDateFields field,
+ UErrorCode& status) {
+ if ( U_FAILURE(status) ) {
+ return;
+ }
+ // following will not set wrong status
+ int32_t itvPtnIndex = DateIntervalInfo::calendarFieldToIntervalIndex(field,
+ status);
+ PatternInfo& timeItvPtnInfo = fIntervalPatterns[itvPtnIndex];
+ if ( !timeItvPtnInfo.firstPart.isEmpty() ) {
+ // UnicodeString allocated here is adopted, so no need to delete
+ UnicodeString* timeIntervalPattern = new UnicodeString(timeItvPtnInfo.firstPart);
+ timeIntervalPattern->append(timeItvPtnInfo.secondPart);
+ UnicodeString* dateStr = new UnicodeString(datePattern);
+ Formattable fmtArray[2];
+ fmtArray[0].adoptString(timeIntervalPattern);
+ fmtArray[1].adoptString(dateStr);
+ UnicodeString combinedPattern;
+ MessageFormat::format(UnicodeString(TRUE, format, formatLen),
+ fmtArray, 2, combinedPattern, status);
+ if ( U_FAILURE(status) ) {
+ return;
+ }
+ setIntervalPattern(field, combinedPattern, timeItvPtnInfo.laterDateFirst);
+ }
+ // else: fall back
+ // it should not happen if the interval format defined is valid
+}
+
+
+
+const UChar
+DateIntervalFormat::fgCalendarFieldToPatternLetter[] =
+{
+ /*GyM*/ CAP_G, LOW_Y, CAP_M,
+ /*wWd*/ LOW_W, CAP_W, LOW_D,
+ /*DEF*/ CAP_D, CAP_E, CAP_F,
+ /*ahH*/ LOW_A, LOW_H, CAP_H,
+ /*m..*/ LOW_M,
+};
+
+
+U_NAMESPACE_END
+
+#endif
Added: trunk/source/i18n/dtitvinf.cpp
===================================================================
--- trunk/source/i18n/dtitvinf.cpp (rev 0)
+++ trunk/source/i18n/dtitvinf.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,599 @@
+/*******************************************************************************
+* Copyright (C) 2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTITVINF.CPP
+*
+*******************************************************************************
+*/
+
+
+//FIXME: how to define it in compiler time
+//#define DTITVINF_DEBUG 0
+
+
+#ifdef DTITVINF_DEBUG
+#include <iostream>
+#endif
+
+#include "cstring.h"
+#include "unicode/msgfmt.h"
+#include "unicode/dtitvinf.h"
+#include "dtitv_impl.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+
+U_NAMESPACE_BEGIN
+
+
+#ifdef DTITVINF_DEBUG
+#define PRINTMESG(msg) { std::cout << "(" << __FILE__ << ":" << __LINE__ << ") " << msg << "\n"; }
+#endif
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateIntervalInfo)
+
+static const char gIntervalDateTimePatternTag[]="IntervalDateTimePatterns";
+static const char gFallbackPatternTag[]="Fallback";
+
+// {0}
+static const UChar gFirstPattern[] = {LEFT_CURLY_BRACKET, DIGIT_ZERO, RIGHT_CURLY_BRACKET, 0};
+// {1}
+static const UChar gSecondPattern[] = {LEFT_CURLY_BRACKET, DIGIT_ONE, RIGHT_CURLY_BRACKET, 0};
+
+// default fall-back
+static const UChar gDefaultFallbackPattern[] = {LEFT_CURLY_BRACKET, DIGIT_ZERO, RIGHT_CURLY_BRACKET, SPACE, EN_DASH, SPACE, LEFT_CURLY_BRACKET, DIGIT_ONE, RIGHT_CURLY_BRACKET, 0};
+
+
+
+DateIntervalInfo::DateIntervalInfo(UErrorCode& status)
+: fFallbackIntervalPattern(gDefaultFallbackPattern),
+ fFirstDateInPtnIsLaterDate(false),
+ fIntervalPatterns(NULL)
+{
+ if ( U_FAILURE(status) ) {
+ return;
+ }
+ fIntervalPatterns = initHash(status);
+}
+
+
+
+DateIntervalInfo::DateIntervalInfo(const Locale& locale, UErrorCode& status)
+: fFallbackIntervalPattern(gDefaultFallbackPattern),
+ fFirstDateInPtnIsLaterDate(false),
+ fIntervalPatterns(NULL)
+{
+ if ( U_FAILURE(status) ) {
+ return;
+ }
+ initializeData(locale, status);
+}
+
+
+
+void
+DateIntervalInfo::setIntervalPattern(const UnicodeString& skeleton,
+ UCalendarDateFields lrgDiffCalUnit,
+ const UnicodeString& intervalPattern,
+ UErrorCode& status) {
+ if ( U_FAILURE(status) ) {
+ return;
+ }
+
+ if ( lrgDiffCalUnit == UCAL_HOUR_OF_DAY ) {
+ setIntervalPatternInternally(skeleton, UCAL_AM_PM, intervalPattern, status);
+ setIntervalPatternInternally(skeleton, UCAL_HOUR, intervalPattern, status);
+ } else if ( lrgDiffCalUnit == UCAL_DAY_OF_MONTH ||
+ lrgDiffCalUnit == UCAL_DAY_OF_WEEK ) {
+ setIntervalPatternInternally(skeleton, UCAL_DATE, intervalPattern, status);
+ } else {
+ setIntervalPatternInternally(skeleton, lrgDiffCalUnit, intervalPattern, status);
+ }
+}
+
+
+void
+DateIntervalInfo::setFallbackIntervalPattern(
+ const UnicodeString& fallbackPattern) {
+ int32_t firstPatternIndex = fallbackPattern.indexOf(gFirstPattern,
+ sizeof(gFirstPattern)/sizeof(gFirstPattern[0]), 0);
+ int32_t secondPatternIndex = fallbackPattern.indexOf(gSecondPattern,
+ sizeof(gSecondPattern)/sizeof(gSecondPattern[0]), 0);
+ if ( firstPatternIndex > secondPatternIndex ) {
+ fFirstDateInPtnIsLaterDate = true;
+ }
+ fFallbackIntervalPattern = fallbackPattern;
+}
+
+
+
+DateIntervalInfo::DateIntervalInfo(const DateIntervalInfo& dtitvinf)
+: UObject(dtitvinf),
+ fIntervalPatterns(NULL)
+{
+ *this = dtitvinf;
+}
+
+
+
+DateIntervalInfo&
+DateIntervalInfo::operator=(const DateIntervalInfo& dtitvinf) {
+ if ( this == &dtitvinf ) {
+ return *this;
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ deleteHash(fIntervalPatterns);
+ fIntervalPatterns = initHash(status);
+ if ( U_FAILURE(status) ) {
+ return *this;
+ }
+ copyHash(dtitvinf.fIntervalPatterns, fIntervalPatterns, status);
+ if ( U_FAILURE(status) ) {
+ return *this;
+ }
+
+ fFallbackIntervalPattern = dtitvinf.fFallbackIntervalPattern;
+ fFirstDateInPtnIsLaterDate = dtitvinf.fFirstDateInPtnIsLaterDate;
+ return *this;
+}
+
+
+DateIntervalInfo*
+DateIntervalInfo::clone() const {
+ return new DateIntervalInfo(*this);
+}
+
+
+DateIntervalInfo::~DateIntervalInfo() {
+ deleteHash(fIntervalPatterns);
+ fIntervalPatterns = NULL;
+}
+
+
+UBool
+DateIntervalInfo::operator==(const DateIntervalInfo& other) const {
+ UBool equal = (
+ fFallbackIntervalPattern == other.fFallbackIntervalPattern &&
+ fFirstDateInPtnIsLaterDate == other.fFirstDateInPtnIsLaterDate );
+
+ if ( equal == TRUE ) {
+ equal = fIntervalPatterns->equals(*(other.fIntervalPatterns));
+ }
+
+ return equal;
+}
+
+
+const UnicodeString*
+DateIntervalInfo::getIntervalPattern(const UnicodeString& skeleton,
+ UCalendarDateFields field,
+ UErrorCode& status) const {
+ if ( U_FAILURE(status) ) {
+ return NULL;
+ }
+
+ const UnicodeString* patternsOfOneSkeleton = (UnicodeString*) fIntervalPatterns->get(skeleton);
+ if ( patternsOfOneSkeleton != NULL ) {
+ int8_t index = (int8_t)calendarFieldToIntervalIndex(field, status);
+ if ( U_FAILURE(status) ) {
+ return NULL;
+ }
+ const UnicodeString& intervalPattern = patternsOfOneSkeleton[index];
+ if ( !intervalPattern.isEmpty() ) {
+ return &intervalPattern;
+ }
+ }
+ return NULL;
+}
+
+
+void
+DateIntervalInfo::initializeData(const Locale& locale, UErrorCode& status)
+{
+ fIntervalPatterns = initHash(status);
+ if ( U_FAILURE(status) ) {
+ return;
+ }
+ CalendarData* calData = new CalendarData(locale, NULL, status);
+ if ( calData == NULL ) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ const UResourceBundle* itvDtPtnResource = calData->getByKey(
+ gIntervalDateTimePatternTag, status);
+
+ // look for fallback first, since it establishes the default order
+ const UChar* resStr;
+ int32_t resStrLen = 0;
+ resStr = ures_getStringByKeyWithFallback(itvDtPtnResource,
+ gFallbackPatternTag,
+ &resStrLen, &status);
+ if ( U_FAILURE(status) ) {
+ delete calData;
+ return;
+ }
+
+ UnicodeString pattern = UnicodeString(TRUE, resStr, resStrLen);
+ setFallbackIntervalPattern(pattern);
+
+ int32_t size = ures_getSize(itvDtPtnResource);
+ int32_t index;
+ for ( index = 0; index < size; ++index ) {
+ UResourceBundle* oneRes = ures_getByIndex(itvDtPtnResource, index,
+ NULL, &status);
+ if ( U_FAILURE(status) ) {
+ delete calData;
+ return;
+ }
+
+ const char* skeleton = ures_getKey(oneRes);
+ ures_close(oneRes);
+ if ( skeleton == NULL ) {
+ status = U_MISSING_RESOURCE_ERROR;
+ delete calData;
+ return;
+ }
+ if ( uprv_strcmp(skeleton, gFallbackPatternTag) == 0 ) {
+ continue; // fallback
+ }
+
+ UResourceBundle* intervalPatterns = ures_getByKey(itvDtPtnResource,
+ skeleton, NULL, &status);
+
+ if ( U_FAILURE(status) ) {
+ delete calData;
+ return;
+ }
+
+ // return if interval patterns for skeleton not found
+ if ( intervalPatterns == NULL ) {
+ status = U_MISSING_RESOURCE_ERROR;
+ delete calData;
+ return;
+ }
+
+ const UChar* pattern;
+ const char* key;
+ int32_t ptLength;
+ int32_t ptnNum = ures_getSize(intervalPatterns);
+ int32_t ptnIndex;
+ for ( ptnIndex = 0; ptnIndex < ptnNum; ++ptnIndex ) {
+ pattern = ures_getNextString(intervalPatterns, &ptLength, &key,
+ &status);
+ if ( U_FAILURE(status) ) {
+ delete calData;
+ return;
+ }
+
+ UCalendarDateFields calendarField = UCAL_FIELD_COUNT;
+ if ( !uprv_strcmp(key, "y") ) {
+ calendarField = UCAL_YEAR;
+ } else if ( !uprv_strcmp(key, "M") ) {
+ calendarField = UCAL_MONTH;
+ } else if ( !uprv_strcmp(key, "d") ) {
+ calendarField = UCAL_DATE;
+ } else if ( !uprv_strcmp(key, "a") ) {
+ calendarField = UCAL_AM_PM;
+ } else if ( !uprv_strcmp(key, "h") ) {
+ calendarField = UCAL_HOUR;
+ } else if ( !uprv_strcmp(key, "m") ) {
+ calendarField = UCAL_MINUTE;
+ }
+ if ( calendarField != UCAL_FIELD_COUNT ) {
+ setIntervalPatternInternally(skeleton, calendarField, pattern,status);
+ }
+ }
+ ures_close(intervalPatterns);
+ }
+ delete calData;
+}
+
+
+
+void
+DateIntervalInfo::setIntervalPatternInternally(const UnicodeString& skeleton,
+ UCalendarDateFields lrgDiffCalUnit,
+ const UnicodeString& intervalPattern,
+ UErrorCode& status) {
+ int8_t index = (int8_t)calendarFieldToIntervalIndex(lrgDiffCalUnit,status);
+ if ( U_FAILURE(status) ) {
+ return;
+ }
+ UnicodeString* patternsOfOneSkeleton = (UnicodeString*)(fIntervalPatterns->get(skeleton));
+ UBool emptyHash = false;
+ if ( patternsOfOneSkeleton == NULL ) {
+ patternsOfOneSkeleton = new UnicodeString[kIPI_MAX_INDEX];
+ emptyHash = true;
+ }
+
+ patternsOfOneSkeleton[index] = intervalPattern;
+ if ( emptyHash == TRUE ) {
+ fIntervalPatterns->put(skeleton, patternsOfOneSkeleton, status);
+ }
+}
+
+
+
+void
+DateIntervalInfo::parseSkeleton(const UnicodeString& skeleton,
+ int32_t* skeletonFieldWidth) {
+ const int8_t PATTERN_CHAR_BASE = 0x41;
+ int32_t i;
+ for ( i = 0; i < skeleton.length(); ++i ) {
+ // it is an ASCII char in skeleton
+ int8_t ch = (int8_t)skeleton.charAt(i);
+ ++skeletonFieldWidth[ch - PATTERN_CHAR_BASE];
+ }
+}
+
+
+
+UBool
+DateIntervalInfo::stringNumeric(int32_t fieldWidth, int32_t anotherFieldWidth,
+ char patternLetter) {
+ if ( patternLetter == 'M' ) {
+ if ( fieldWidth <= 2 && anotherFieldWidth > 2 ||
+ fieldWidth > 2 && anotherFieldWidth <= 2 ) {
+ return true;
+ }
+ }
+ return false;
+}
+
+
+
+const UnicodeString*
+DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton,
+ int8_t& bestMatchDistanceInfo) const {
+#ifdef DTITVINF_DEBUG
+ char result[1000];
+ char result_1[1000];
+ char mesg[2000];
+ skeleton.extract(0, skeleton.length(), result, "UTF-8");
+ sprintf(mesg, "in getBestSkeleton: skeleton: %s; \n", result);
+ PRINTMESG(mesg)
+#endif
+
+
+ int32_t inputSkeletonFieldWidth[] =
+ {
+ // A B C D E F G H I J K L M N O
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // P Q R S T U V W X Y Z
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // a b c d e f g h i j k l m n o
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // p q r s t u v w x y z
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+
+ int32_t skeletonFieldWidth[] =
+ {
+ // A B C D E F G H I J K L M N O
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // P Q R S T U V W X Y Z
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // a b c d e f g h i j k l m n o
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ // p q r s t u v w x y z
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+
+ const int32_t DIFFERENT_FIELD = 0x1000;
+ const int32_t STRING_NUMERIC_DIFFERENCE = 0x100;
+ const int32_t BASE = 0x41;
+ const UChar CHAR_V = 0x0076;
+ const UChar CHAR_Z = 0x007A;
+
+ // hack for 'v' and 'z'.
+ // resource bundle only have time skeletons ending with 'v',
+ // but not for time skeletons ending with 'z'.
+ UBool replaceZWithV = false;
+ const UnicodeString* inputSkeleton = &skeleton;
+ UnicodeString copySkeleton;
+ if ( skeleton.indexOf(CHAR_Z) != -1 ) {
+ UChar zstr[2];
+ UChar vstr[2];
+ zstr[0]=CHAR_Z;
+ vstr[0]=CHAR_V;
+ zstr[1]=0;
+ vstr[1]=0;
+ copySkeleton = skeleton;
+ copySkeleton.findAndReplace(zstr, vstr);
+ inputSkeleton = ©Skeleton;
+ replaceZWithV = true;
+ }
+
+ parseSkeleton(*inputSkeleton, inputSkeletonFieldWidth);
+ int32_t bestDistance = MAX_POSITIVE_INT;
+ const UnicodeString* bestSkeleton = NULL;
+
+ // 0 means exact the same skeletons;
+ // 1 means having the same field, but with different length,
+ // 2 means only z/v differs
+ // -1 means having different field.
+ bestMatchDistanceInfo = 0;
+ int8_t fieldLength = sizeof(skeletonFieldWidth)/sizeof(skeletonFieldWidth[0]);
+
+ int32_t pos = -1;
+ const UHashElement* elem = NULL;
+ while ( (elem = fIntervalPatterns->nextElement(pos)) != NULL ) {
+ const UHashTok keyTok = elem->key;
+ UnicodeString* skeleton = (UnicodeString*)keyTok.pointer;
+#ifdef DTITVINF_DEBUG
+ skeleton->extract(0, skeleton->length(), result, "UTF-8");
+ sprintf(mesg, "available skeletons: skeleton: %s; \n", result);
+ PRINTMESG(mesg)
+#endif
+
+ // clear skeleton field width
+ int8_t i;
+ for ( i = 0; i < fieldLength; ++i ) {
+ skeletonFieldWidth[i] = 0;
+ }
+ parseSkeleton(*skeleton, skeletonFieldWidth);
+ // calculate distance
+ int32_t distance = 0;
+ int8_t fieldDifference = 1;
+ for ( i = 0; i < fieldLength; ++i ) {
+ int32_t inputFieldWidth = inputSkeletonFieldWidth[i];
+ int32_t fieldWidth = skeletonFieldWidth[i];
+ if ( inputFieldWidth == fieldWidth ) {
+ continue;
+ }
+ if ( inputFieldWidth == 0 ) {
+ fieldDifference = -1;
+ distance += DIFFERENT_FIELD;
+ } else if ( fieldWidth == 0 ) {
+ fieldDifference = -1;
+ distance += DIFFERENT_FIELD;
+ } else if (stringNumeric(inputFieldWidth, fieldWidth,
+ (char)(i+BASE) ) ) {
+ distance += STRING_NUMERIC_DIFFERENCE;
+ } else {
+ distance += (inputFieldWidth > fieldWidth) ?
+ (inputFieldWidth - fieldWidth) :
+ (fieldWidth - inputFieldWidth);
+ }
+ }
+ if ( distance < bestDistance ) {
+ bestSkeleton = skeleton;
+ bestDistance = distance;
+ bestMatchDistanceInfo = fieldDifference;
+ }
+ if ( distance == 0 ) {
+ bestMatchDistanceInfo = 0;
+ break;
+ }
+ }
+ if ( replaceZWithV && bestMatchDistanceInfo != -1 ) {
+ bestMatchDistanceInfo = 2;
+ }
+ return bestSkeleton;
+}
+
+
+
+DateIntervalInfo::IntervalPatternIndex
+DateIntervalInfo::calendarFieldToIntervalIndex(UCalendarDateFields field,
+ UErrorCode& status) {
+ IntervalPatternIndex index = kIPI_ERA;
+ switch ( field ) {
+ case UCAL_ERA:
+ break;
+ case UCAL_YEAR:
+ index = kIPI_YEAR;
+ break;
+ case UCAL_MONTH:
+ index = kIPI_MONTH;
+ break;
+ case UCAL_DATE:
+ case UCAL_DAY_OF_WEEK:
+ //case UCAL_DAY_OF_MONTH:
+ index = kIPI_DATE;
+ break;
+ case UCAL_AM_PM:
+ index = kIPI_AM_PM;
+ break;
+ case UCAL_HOUR:
+ case UCAL_HOUR_OF_DAY:
+ index = kIPI_HOUR;
+ break;
+ case UCAL_MINUTE:
+ index = kIPI_MINUTE;
+ break;
+ default:
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return index;
+}
+
+
+
+void
+DateIntervalInfo::deleteHash(Hashtable* hTable)
+{
+ if ( hTable == NULL ) {
+ return;
+ }
+ int32_t pos = -1;
+ const UHashElement* element = NULL;
+ while ( (element = hTable->nextElement(pos)) != NULL ) {
+ const UHashTok keyTok = element->key;
+ const UHashTok valueTok = element->value;
+ const UnicodeString* value = (UnicodeString*)valueTok.pointer;
+ delete[] value;
+ }
+ delete fIntervalPatterns;
+}
+
+
+/**
+ * set hash table value comparator
+ *
+ * @param val1 one value in comparison
+ * @param val2 the other value in comparison
+ * @return TRUE if 2 values are the same, FALSE otherwise
+ */
+static UBool U_CALLCONV hashTableValueComparator(UHashTok val1, UHashTok val2);
+
+UBool
+U_CALLCONV hashTableValueComparator(UHashTok val1, UHashTok val2) {
+ const UnicodeString* pattern1 = (UnicodeString*)val1.pointer;
+ const UnicodeString* pattern2 = (UnicodeString*)val2.pointer;
+ UBool ret = TRUE;
+ int8_t i;
+ for ( i = 0; i < DateIntervalInfo::kIPI_MAX_INDEX && ret == TRUE; ++i ) {
+ ret = (pattern1[i] == pattern2[i]);
+ }
+ return ret;
+}
+
+
+
+Hashtable*
+DateIntervalInfo::initHash(UErrorCode& status) {
+
+ Hashtable* hTable;
+ if ( (hTable = new Hashtable(TRUE, status)) == NULL ) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ hTable->setValueCompartor(hashTableValueComparator);
+ return hTable;
+}
+
+
+void
+DateIntervalInfo::copyHash(const Hashtable* source,
+ Hashtable* target,
+ UErrorCode& status) {
+ int32_t pos = -1;
+ const UHashElement* element = NULL;
+ if ( source ) {
+ while ( (element = source->nextElement(pos)) != NULL ) {
+ const UHashTok keyTok = element->key;
+ const UnicodeString* key = (UnicodeString*)keyTok.pointer;
+ const UHashTok valueTok = element->value;
+ const UnicodeString* value = (UnicodeString*)valueTok.pointer;
+ UnicodeString* copy = new UnicodeString[kIPI_MAX_INDEX];
+ int8_t i;
+ for ( i = 0; i < kIPI_MAX_INDEX; ++i ) {
+ copy[i] = value[i];
+ }
+ target->put(UnicodeString(*key), copy, status);
+ if ( U_FAILURE(status) ) {
+ return;
+ }
+ }
+ }
+}
+
+
+U_NAMESPACE_END
+
+#endif
Modified: trunk/source/i18n/i18n.vcproj
===================================================================
--- trunk/source/i18n/i18n.vcproj 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/i18n.vcproj 2008-05-31 14:31:31 UTC (rev 153)
@@ -805,6 +805,26 @@
</FileConfiguration>
</File>
<File
+ RelativePath=".\dtitv_impl.h"
+ >
+ </File>
+ <File
+ RelativePath=".\dtitvfmt.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\unicode\dtitvfmt.h"
+ >
+ </File>
+ <File
+ RelativePath=".\dtitvinf.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\unicode\dtitvinf.h"
+ >
+ </File>
+ <File
RelativePath=".\dtptngen.cpp"
>
</File>
@@ -1239,6 +1259,10 @@
</FileConfiguration>
</File>
<File
+ RelativePath=".\plurrule_impl.h"
+ >
+ </File>
+ <File
RelativePath=".\rbnf.cpp"
>
</File>
Modified: trunk/source/i18n/search.cpp
===================================================================
--- trunk/source/i18n/search.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/search.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -106,9 +106,21 @@
UErrorCode &status)
{
if (U_SUCCESS(status)) {
+#if 0
m_search_->breakIter = NULL;
// the c++ breakiterator may not make use of ubreakiterator.
// so we'll have to keep track of it ourselves.
+#else
+ // Well, gee... the Constructors that take a BreakIterator
+ // all cast the BreakIterator to a UBreakIterator and
+ // pass it to the corresponding usearch_openFromXXX
+ // routine, so there's no reason not to do this.
+ //
+ // Besides, a UBreakIterator is a BreakIterator, so
+ // any subclass of BreakIterator should work fine here...
+ m_search_->breakIter = (UBreakIterator *) breakiter;
+#endif
+
m_breakiterator_ = breakiter;
}
}
@@ -283,10 +295,16 @@
}
if (matchindex != USEARCH_DONE) {
+ if (m_search_->isOverlap) {
+ matchindex += m_search_->matchedLength - 2;
+ }
+
return handlePrev(matchindex, status);
}
+
return handlePrev(offset, status);
}
+
return USEARCH_DONE;
}
Modified: trunk/source/i18n/smpdtfmt.cpp
===================================================================
--- trunk/source/i18n/smpdtfmt.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/smpdtfmt.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -65,6 +65,8 @@
U_NAMESPACE_BEGIN
+static const UChar PATTERN_CHAR_BASE = 0x40;
+
/**
* Last-resort string to use for "GMT" when constructing time zone strings.
*/
@@ -580,6 +582,38 @@
//----------------------------------------------------------------------
+/* Map calendar field into calendar field level.
+ * the larger the level, the smaller the field unit.
+ * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
+ * UCAL_MONTH level is 20.
+ */
+const int32_t
+SimpleDateFormat::fgCalendarFieldToLevel[] =
+{
+ /*GyM*/ 0, 10, 20,
+ /*wW*/ 20, 30,
+ /*dDEF*/ 30, 20, 30, 30,
+ /*ahHm*/ 40, 50, 50, 60,
+ /*sS..*/ 70, 80,
+ /*z?Y*/ 0, 0, 10,
+ /*eug*/ 30, 10, 0,
+ /*A*/ 40
+};
+
+
+const int32_t
+SimpleDateFormat::fgPatternCharToLevel[] = {
+ // A B C D E F G H I J K L M N O
+ -1, 40, -1, -1, 20, 30, 30, 0, 50, -1, -1, 50, 20, 20, -1, -1,
+ // P Q R S T U V W X Y Z
+ -1, 20, -1, 80, -1, -1, 0, 30, -1, 10, 0, -1, -1, -1, -1, -1,
+ // a b c d e f g h i j k l m n o
+ -1, 40, -1, 30, 30, 30, -1, 0, 50, -1, -1, 50, -1, 60, -1, -1,
+ // p q r s t u v w x y z
+ -1, 20, -1, 70, -1, 10, 0, 20, -1, 10, 0, -1, -1, -1, -1, -1
+};
+
+
// Map index into pattern character string to Calendar field number.
const UCalendarDateFields
SimpleDateFormat::fgPatternIndexToCalendarField[] =
@@ -2427,6 +2461,67 @@
initializeDefaultCentury(); // we need a new century (possibly)
}
+
+//----------------------------------------------------------------------
+
+
+UBool
+SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const {
+ return isFieldUnitIgnored(fPattern, field);
+}
+
+
+UBool
+SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern,
+ UCalendarDateFields field) {
+ int32_t fieldLevel = fgCalendarFieldToLevel[field];
+ int32_t level;
+ UChar ch;
+ UBool inQuote = FALSE;
+ UChar prevCh = 0;
+ int32_t count = 0;
+
+ for (int32_t i = 0; i < pattern.length(); ++i) {
+ ch = pattern[i];
+ if (ch != prevCh && count > 0) {
+ level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE];
+ if ( fieldLevel <= level ) {
+ return FALSE;
+ }
+ count = 0;
+ }
+ if (ch == QUOTE) {
+ if ((i+1) < pattern.length() && pattern[i+1] == QUOTE) {
+ ++i;
+ } else {
+ inQuote = ! inQuote;
+ }
+ }
+ else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
+ || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
+ prevCh = ch;
+ ++count;
+ }
+ }
+ if ( count > 0 ) {
+ // last item
+ level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE];
+ if ( fieldLevel <= level ) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+
+
+const Locale&
+SimpleDateFormat::getSmpFmtLocale(void) const {
+ return fLocale;
+}
+
+
+
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
Modified: trunk/source/i18n/stsearch.cpp
===================================================================
--- trunk/source/i18n/stsearch.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/stsearch.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -350,11 +350,13 @@
// looking at usearch.cpp, this part is shifted out to
// StringSearch instead of SearchIterator because m_strsrch_ is
// not accessible in SearchIterator
+#if 0
if (position + m_strsrch_->pattern.defaultShiftSize
> m_search_->textLength) {
setMatchNotFound();
return USEARCH_DONE;
}
+#endif
if (m_search_->matchedLength <= 0) {
// the flipping direction issue has already been handled
// in next()
@@ -366,6 +368,8 @@
}
ucol_setOffset(m_strsrch_->textIter, position, &status);
+
+#if 0
for (;;) {
if (m_search_->isCanonicalMatch) {
// can't use exact here since extra accents are allowed.
@@ -397,6 +401,29 @@
return m_search_->matchedIndex;
}
}
+#else
+ // if m_strsrch_->breakIter is always the same as m_breakiterator_
+ // then we don't need to check the match boundaries here because
+ // usearch_handleNextXXX will already have done it.
+ if (m_search_->isCanonicalMatch) {
+ // *could* actually use exact here 'cause no extra accents allowed...
+ usearch_handleNextCanonical(m_strsrch_, &status);
+ } else {
+ usearch_handleNextExact(m_strsrch_, &status);
+ }
+
+ if (U_FAILURE(status)) {
+ return USEARCH_DONE;
+ }
+
+ if (m_search_->matchedIndex == USEARCH_DONE) {
+ ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
+ } else {
+ ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
+ }
+
+ return m_search_->matchedIndex;
+#endif
}
}
return USEARCH_DONE;
@@ -424,11 +451,13 @@
// looking at usearch.cpp, this part is shifted out to
// StringSearch instead of SearchIterator because m_strsrch_ is
// not accessible in SearchIterator
+#if 0
if (!m_search_->isOverlap &&
position - m_strsrch_->pattern.defaultShiftSize < 0) {
setMatchNotFound();
return USEARCH_DONE;
}
+
for (;;) {
if (m_search_->isCanonicalMatch) {
// can't use exact here since extra accents are allowed.
@@ -452,6 +481,22 @@
return m_search_->matchedIndex;
}
}
+#else
+ ucol_setOffset(m_strsrch_->textIter, position, &status);
+
+ if (m_search_->isCanonicalMatch) {
+ // *could* use exact match here since extra accents *not* allowed!
+ usearch_handlePreviousCanonical(m_strsrch_, &status);
+ } else {
+ usearch_handlePreviousExact(m_strsrch_, &status);
+ }
+
+ if (U_FAILURE(status)) {
+ return USEARCH_DONE;
+ }
+
+ return m_search_->matchedIndex;
+#endif
}
return m_search_->matchedIndex;
Modified: trunk/source/i18n/ucal.cpp
===================================================================
--- trunk/source/i18n/ucal.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/ucal.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -139,6 +139,22 @@
delete (Calendar*) cal;
}
+U_CAPI UCalendar* U_EXPORT2
+ucal_clone(const UCalendar* cal,
+ UErrorCode* status)
+{
+ if(U_FAILURE(*status)) return 0;
+
+ Calendar* res = ((Calendar*)cal)->clone();
+
+ if(res == 0) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+ return (UCalendar*) res;
+}
+
U_CAPI void U_EXPORT2
ucal_setTimeZone( UCalendar* cal,
const UChar* zoneID,
Modified: trunk/source/i18n/ucol.cpp
===================================================================
--- trunk/source/i18n/ucol.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/ucol.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -101,6 +101,10 @@
(s)->extendCEs = NULL;
(s)->extendCEsSize = 0;
(s)->CEpos = (s)->toReturn = (s)->CEs;
+ (s)->offsetBuffer = NULL;
+ (s)->offsetBufferSize = 0;
+ (s)->offsetReturn = (s)->offsetStore = NULL;
+ (s)->offsetRepeatCount = (s)->offsetRepeatValue = 0;
(s)->writableBuffer = (s)->stackWritableBuffer;
(s)->writableBufSize = UCOL_WRITABLE_BUFFER_SIZE;
(s)->coll = (collator);
@@ -175,6 +179,7 @@
}
}
data->pos = backup->pos;
+
if ((data->flags & UCOL_ITER_INNORMBUF) &&
data->writableBuffer != backup->bufferaddress) {
/*
@@ -1377,6 +1382,7 @@
}
UChar ch = 0;
+ collationSource->offsetReturn = NULL;
for (;;) /* Loop handles case when incremental normalize switches */
{ /* to or from the side buffer / original string, and we */
@@ -1586,6 +1592,83 @@
unorm_normalize(pStart, (pEnd - pStart) + 1, UNORM_NFD, 0, pStartNorm,
normLen, &status);
+ if (data->offsetBuffer == NULL) {
+ int32_t len = normLen >= UCOL_EXPAND_CE_BUFFER_SIZE ? normLen + 1 : UCOL_EXPAND_CE_BUFFER_SIZE;
+
+ data->offsetBufferSize = len;
+ data->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * len);
+ data->offsetStore = data->offsetBuffer;
+ } else if(data->offsetBufferSize < (int32_t) normLen) {
+ int32_t storeIX = data->offsetStore - data->offsetBuffer;
+ int32_t *tob = (int32_t *) uprv_realloc(data->offsetBuffer, sizeof(int32_t) * (normLen + 1));
+
+ if (tob != NULL) {
+ data->offsetBuffer = tob;
+ data->offsetStore = &data->offsetBuffer[storeIX];
+ data->offsetBufferSize = normLen + 1;
+ }
+ }
+
+ /*
+ * The usual case at this point is that we've got a base
+ * character followed by marks that were normalized. If
+ * fcdPosition is NULL, that means that we backed up to
+ * the beginning of the string and there's no base character.
+ *
+ * Forward processing will usually normalize when it sees
+ * the first mark, so that mark will get it's natural offset
+ * and the rest will get the offset of the character following
+ * the marks. The base character will also get its natural offset.
+ *
+ * We write the offset of the base character, if there is one,
+ * followed by the offset of the first mark and then the offsets
+ * of the rest of the marks.
+ */
+ int32_t firstMarkOffset = 0;
+ int32_t trailOffset = data->pos - data->string + 1;
+ int32_t trailCount = normLen - 1;
+
+ if (data->fcdPosition != NULL) {
+ int32_t baseOffset = data->fcdPosition - data->string;
+ UChar baseChar = *data->fcdPosition;
+
+ firstMarkOffset = baseOffset + 1;
+
+ /*
+ * If the base character is the start of a contraction, forward processing
+ * will normalize the marks while checking for the contraction, which means
+ * that the offset of the first mark will the same as the other marks.
+ *
+ * **** THIS IS PROBABLY NOT A COMPLETE TEST ****
+ */
+ if (baseChar >= 0x100) {
+ uint32_t baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->mapping, baseChar);
+
+ if (baseOrder == UCOL_NOT_FOUND && data->coll->UCA) {
+ baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->UCA->mapping, baseChar);
+ }
+
+ if (baseOrder > UCOL_NOT_FOUND && getCETag(baseOrder) == CONTRACTION_TAG) {
+ firstMarkOffset = trailOffset;
+ }
+ }
+
+ *(data->offsetStore++) = baseOffset;
+ }
+
+ *(data->offsetStore++) = firstMarkOffset;
+
+ for (int32_t i = 0; i < trailCount; i += 1) {
+ *(data->offsetStore++) = trailOffset;
+ }
+
+ data->offsetRepeatValue = trailOffset;
+
+ data->offsetReturn = data->offsetStore - 1;
+ if (data->offsetReturn == data->offsetBuffer) {
+ data->offsetStore = data->offsetBuffer;
+ }
+
data->pos = data->writableBuffer + data->writableBufSize;
data->origFlags = data->flags;
data->flags |= UCOL_ITER_INNORMBUF;
@@ -1756,10 +1839,24 @@
UErrorCode *status)
{
uint32_t result = (uint32_t)UCOL_NULLORDER;
+
+ if (data->offsetReturn != NULL) {
+ if (data->offsetRepeatCount > 0) {
+ data->offsetRepeatCount -= 1;
+ } else {
+ if (data->offsetReturn == data->offsetBuffer) {
+ data->offsetReturn = NULL;
+ data->offsetStore = data->offsetBuffer;
+ } else {
+ data->offsetReturn -= 1;
+ }
+ }
+ }
+
if ((data->extendCEs && data->toReturn > data->extendCEs) ||
(!data->extendCEs && data->toReturn > data->CEs))
{
- data->toReturn --;
+ data->toReturn -= 1;
result = *(data->toReturn);
if (data->CEs == data->toReturn || data->extendCEs == data->toReturn) {
data->CEpos = data->toReturn;
@@ -1767,6 +1864,7 @@
}
else {
UChar ch = 0;
+
/*
Loop handles case when incremental normalize switches to or from the
side buffer / original string, and we need to start again to get the
@@ -1805,15 +1903,18 @@
Because pointer points to the last accessed character,
hence we have to increment it by one here.
*/
- if (data->fcdPosition == NULL) {
+ data->flags = data->origFlags;
+ data->offsetRepeatValue = 0;
+
+ if (data->fcdPosition == NULL) {
data->pos = data->string;
return UCOL_NO_MORE_CES;
}
else {
data->pos = data->fcdPosition + 1;
}
- data->flags = data->origFlags;
- continue;
+
+ continue;
}
}
@@ -1903,10 +2004,12 @@
}
}
}
+
if(result == UCOL_NOT_FOUND) {
result = getPrevImplicit(ch, data);
}
}
+
return result;
}
@@ -2399,6 +2502,7 @@
}
uint32_t r = uprv_uca_getImplicitPrimary(cp);
*(collationSource->CEpos++) = ((r & 0x0000FFFF)<<16) | 0x000000C0;
+ collationSource->offsetRepeatCount += 1;
return (r & UCOL_PRIMARYMASK) | 0x00000505; // This was 'order'
}
@@ -2871,6 +2975,7 @@
{
*(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER;
CE = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON;
+ source->offsetRepeatCount += 1;
return CE;
}
case EXPANSION_TAG:
@@ -2880,18 +2985,24 @@
/* I have to decide where continuations are going to be dealt with */
uint32_t size;
uint32_t i; /* general counter */
+
CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
size = getExpansionCount(CE);
CE = *CEOffset++;
+ //source->offsetRepeatCount = -1;
+
if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
for(i = 1; i<size; i++) {
*(source->CEpos++) = *CEOffset++;
+ source->offsetRepeatCount += 1;
}
} else { /* else, we do */
while(*CEOffset != 0) {
*(source->CEpos++) = *CEOffset++;
+ source->offsetRepeatCount += 1;
}
}
+
return CE;
}
case DIGIT_TAG:
@@ -3263,6 +3374,29 @@
*(collationSource->CEpos++) = (r & UCOL_PRIMARYMASK) | 0x00000505;
collationSource->toReturn = collationSource->CEpos;
+
+ if (collationSource->offsetBuffer == NULL) {
+ collationSource->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
+ collationSource->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
+ collationSource->offsetStore = collationSource->offsetBuffer;
+ }
+
+ // **** doesn't work if using iterator ****
+ if (collationSource->flags & UCOL_ITER_INNORMBUF) {
+ collationSource->offsetRepeatCount = 1;
+ } else {
+ int32_t firstOffset = (int32_t)(collationSource->pos - collationSource->string);
+
+ *(collationSource->offsetStore++) = firstOffset;
+ *(collationSource->offsetStore++) = firstOffset + 1;
+
+ collationSource->offsetReturn = collationSource->offsetStore - 1;
+ *(collationSource->offsetBuffer) = firstOffset;
+ if (collationSource->offsetReturn == collationSource->offsetBuffer) {
+ collationSource->offsetStore = collationSource->offsetBuffer;
+ }
+ }
+
return ((r & 0x0000FFFF)<<16) | 0x000000C0;
}
@@ -3293,6 +3427,7 @@
{
case NOT_FOUND_TAG: /* this tag always returns */
return CE;
+
case SPEC_PROC_TAG:
{
// Special processing is getting a CE that is preceded by a certain prefix
@@ -3450,15 +3585,54 @@
*(UCharOffset) = schar;
noChars++;
+ int32_t offsetBias;
+
+#if 0
+ if (source->offsetReturn != NULL) {
+ source->offsetStore = source->offsetReturn - noChars;
+ }
+
+ // **** doesn't work if using iterator ****
+ if (source->flags & UCOL_ITER_INNORMBUF) {
+ if (source->fcdPosition == NULL) {
+ offsetBias = 0;
+ } else {
+ offsetBias = (int32_t)(source->fcdPosition - source->string);
+ }
+ } else {
+ offsetBias = (int32_t)(source->pos - source->string);
+ }
+
+#else
+ // **** doesn't work if using iterator ****
+ if (source->flags & UCOL_ITER_INNORMBUF) {
+#if 1
+ offsetBias = -1;
+#else
+ if (source->fcdPosition == NULL) {
+ offsetBias = 0;
+ } else {
+ offsetBias = (int32_t)(source->fcdPosition - source->string);
+ }
+#endif
+ } else {
+ offsetBias = (int32_t)(source->pos - source->string);
+ }
+#endif
+
/* a new collIterate is used to simplify things, since using the current
collIterate will mean that the forward and backwards iteration will
share and change the same buffers. we don't want to get into that. */
collIterate temp;
+ int32_t rawOffset;
+
//IInit_collIterate(coll, UCharOffset, -1, &temp);
IInit_collIterate(coll, UCharOffset, noChars, &temp);
temp.flags &= ~UCOL_ITER_NORM;
+ rawOffset = temp.pos - temp.string; // should always be zero?
CE = ucol_IGetNextCE(coll, &temp, status);
+
if (source->extendCEs) {
endCEBuffer = source->extendCEs + source->extendCEsSize;
CECount = (source->CEpos - source->extendCEs)/sizeof(uint32_t);
@@ -3466,8 +3640,20 @@
endCEBuffer = source->CEs + UCOL_EXPAND_CE_BUFFER_SIZE;
CECount = (source->CEpos - source->CEs)/sizeof(uint32_t);
}
+
+ if (source->offsetBuffer == NULL) {
+ source->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
+ source->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
+ source->offsetStore = source->offsetBuffer;
+ }
+
while (CE != UCOL_NO_MORE_CES) {
*(source->CEpos ++) = CE;
+
+ if (offsetBias >= 0) {
+ *(source->offsetStore ++) = rawOffset + offsetBias;
+ }
+
CECount++;
if (source->CEpos == endCEBuffer) {
/* ran out of CE space, reallocate to new buffer.
@@ -3494,43 +3680,135 @@
source->extendCEs = tempBufCE;
}
}
+
if (CECount == -1) {
*status = U_MEMORY_ALLOCATION_ERROR;
source->extendCEsSize = 0;
source->CEpos = source->CEs;
freeHeapWritableBuffer(&temp);
+
if (strbuffer != buffer) {
uprv_free(strbuffer);
}
+
return (uint32_t)UCOL_NULLORDER;
}
+
source->CEpos = source->extendCEs + CECount;
endCEBuffer = source->extendCEs + source->extendCEsSize;
}
+
+ if (offsetBias >= 0 && source->offsetStore >= &source->offsetBuffer[source->offsetBufferSize]) {
+ int32_t storeIX = source->offsetStore - source->offsetBuffer;
+ int32_t *tob = (int32_t *) uprv_realloc(source->offsetBuffer,
+ sizeof(int32_t) * (source->offsetBufferSize + UCOL_EXPAND_CE_BUFFER_EXTEND_SIZE));
+
+ if (tob != NULL) {
+ source->offsetBuffer = tob;
+ source->offsetStore = &source->offsetBuffer[storeIX];
+ source->offsetBufferSize += UCOL_EXPAND_CE_BUFFER_EXTEND_SIZE;
+ } else {
+ // memory error...
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ source->CEpos = source->CEs;
+ freeHeapWritableBuffer(&temp);
+
+ if (strbuffer != buffer) {
+ uprv_free(strbuffer);
+ }
+
+ return (uint32_t) UCOL_NULLORDER;
+ }
+ }
+
+ rawOffset = temp.pos - temp.string;
CE = ucol_IGetNextCE(coll, &temp, status);
}
+
+ if (source->offsetRepeatValue != 0) {
+ if (CECount > noChars) {
+ source->offsetRepeatCount += temp.offsetRepeatCount;
+ } else {
+ // **** does this really skip the right offsets? ****
+ source->offsetReturn -= (noChars - CECount);
+ }
+ }
+
freeHeapWritableBuffer(&temp);
+
if (strbuffer != buffer) {
uprv_free(strbuffer);
}
+
+ if (offsetBias >= 0) {
+ source->offsetReturn = source->offsetStore - 1;
+ if (source->offsetReturn == source->offsetBuffer) {
+ source->offsetStore = source->offsetBuffer;
+ }
+ }
+
source->toReturn = source->CEpos - 1;
if (source->toReturn == source->CEs) {
source->CEpos = source->CEs;
}
+
return *(source->toReturn);
+
case LONG_PRIMARY_TAG:
{
*(source->CEpos++) = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON;
*(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER;
source->toReturn = source->CEpos - 1;
+
+ if (source->offsetBuffer == NULL) {
+ source->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
+ source->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
+ source->offsetStore = source->offsetBuffer;
+ }
+
+ if (source->flags & UCOL_ITER_INNORMBUF) {
+ source->offsetRepeatCount = 1;
+ } else {
+ int32_t firstOffset = (int32_t)(source->pos - source->string);
+
+ *(source->offsetStore++) = firstOffset;
+ *(source->offsetStore++) = firstOffset + 1;
+
+ source->offsetReturn = source->offsetStore - 1;
+ *(source->offsetBuffer) = firstOffset;
+ if (source->offsetReturn == source->offsetBuffer) {
+ source->offsetStore = source->offsetBuffer;
+ }
+ }
+
+
return *(source->toReturn);
}
+
case EXPANSION_TAG: /* this tag always returns */
+ {
/*
This should handle expansion.
NOTE: we can encounter both continuations and expansions in an expansion!
I have to decide where continuations are going to be dealt with
*/
+ int32_t firstOffset = (int32_t)(source->pos - source->string);
+
+ // **** doesn't work if using iterator ****
+ if (source->offsetReturn != NULL) {
+ if (! (source->flags & UCOL_ITER_INNORMBUF) && source->offsetReturn == source->offsetBuffer) {
+ source->offsetStore = source->offsetBuffer;
+ }else {
+ firstOffset = -1;
+ }
+ }
+
+ if (source->offsetBuffer == NULL) {
+ source->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
+ source->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
+ source->offsetStore = source->offsetBuffer;
+ }
+
/* find the offset to expansion table */
CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
size = getExpansionCount(CE);
@@ -3539,23 +3817,45 @@
if there are less than 16 elements in expansion, we don't terminate
*/
uint32_t count;
+
for (count = 0; count < size; count++) {
*(source->CEpos ++) = *CEOffset++;
+
+ if (firstOffset >= 0) {
+ *(source->offsetStore ++) = firstOffset + 1;
+ }
}
- }
- else {
+ } else {
/* else, we do */
while (*CEOffset != 0) {
*(source->CEpos ++) = *CEOffset ++;
+
+ if (firstOffset >= 0) {
+ *(source->offsetStore ++) = firstOffset + 1;
+ }
}
}
+
+ if (firstOffset >= 0) {
+ source->offsetReturn = source->offsetStore - 1;
+ *(source->offsetBuffer) = firstOffset;
+ if (source->offsetReturn == source->offsetBuffer) {
+ source->offsetStore = source->offsetBuffer;
+ }
+ } else {
+ source->offsetRepeatCount += size - 1;
+ }
+
source->toReturn = source->CEpos - 1;
// in case of one element expansion, we
// want to immediately return CEpos
if(source->toReturn == source->CEs) {
source->CEpos = source->CEs;
}
+
return *(source->toReturn);
+ }
+
case DIGIT_TAG:
{
/*
@@ -3592,7 +3892,7 @@
handle surrogates...
*/
- if (U16_IS_TRAIL (ch)){
+ if (U16_IS_TRAIL (ch)) {
if (!collIter_bos(source)){
UChar lead = getPrevNormalizedChar(source, status);
if(U16_IS_LEAD(lead)) {
@@ -3609,12 +3909,11 @@
}
digVal = u_charDigitValue(char32);
- for(;;){
+ for(;;) {
// Make sure we have enough space.
- if (digIndx >= ((numTempBufSize - 2) * 2) + 1)
- {
+ if (digIndx >= ((numTempBufSize - 2) * 2) + 1) {
numTempBufSize *= 2;
- if (numTempBuf == stackNumTempBuf){
+ if (numTempBuf == stackNumTempBuf) {
numTempBuf = (uint8_t *)uprv_malloc(sizeof(uint8_t) * numTempBufSize);
// Null pointer check
if (numTempBuf == NULL) {
@@ -3622,7 +3921,7 @@
return 0;
}
uprv_memcpy(numTempBuf, stackNumTempBuf, UCOL_MAX_BUFFER);
- }else {
+ } else {
uint8_t *temp = (uint8_t *)uprv_realloc(numTempBuf, numTempBufSize);
if (temp == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
@@ -3637,7 +3936,8 @@
// Skip over trailing zeroes, and keep a count of them.
if (digVal != 0)
nonZeroValReached = TRUE;
- if (nonZeroValReached){
+
+ if (nonZeroValReached) {
/*
We parse the digit string into base 100 numbers (this fits into a byte).
We only add to the buffer in twos, thus if we are parsing an odd character,
@@ -3651,7 +3951,7 @@
ones place and the second digit encountered into the tens place.
*/
- if ((digIndx + trailingZeroCount) % 2 == 1){
+ if ((digIndx + trailingZeroCount) % 2 == 1) {
// High-order digit case (tens place)
collateVal += (uint8_t)(digVal * 10);
@@ -3665,37 +3965,33 @@
numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
collateVal = 0;
- }
- else{
+ } else {
// Low-order digit case (ones place)
collateVal = (uint8_t)digVal;
// Check for leading zeroes.
- if (collateVal == 0)
- {
+ if (collateVal == 0) {
if (!leadingZeroIndex)
leadingZeroIndex = (digIndx/2) + 2;
- }
- else
+ } else
leadingZeroIndex = 0;
// No need to write to buffer; the case of a last odd digit
// is handled below.
}
++digIndx;
- }
- else
+ } else
++trailingZeroCount;
- if (!collIter_bos(source)){
+ if (!collIter_bos(source)) {
ch = getPrevNormalizedChar(source, status);
//goBackOne(source);
- if (U16_IS_TRAIL(ch)){
+ if (U16_IS_TRAIL(ch)) {
backupState(source, &state);
- if (!collIter_bos(source))
- {
+ if (!collIter_bos(source)) {
goBackOne(source);
UChar lead = getPrevNormalizedChar(source, status);
+
if(U16_IS_LEAD(lead)) {
char32 = U16_GET_SUPPLEMENTARY(lead,ch);
} else {
@@ -3703,11 +3999,10 @@
char32 = ch;
}
}
- }
- else
+ } else
char32 = ch;
- if ((digVal = u_charDigitValue(char32)) == -1){
+ if ((digVal = u_charDigitValue(char32)) == -1) {
if (char32 > 0xFFFF) {// For surrogates.
loadState(source, &state, FALSE);
}
@@ -3717,22 +4012,23 @@
//getNextNormalizedChar(source);
break;
}
+
goBackOne(source);
}else
break;
}
- if (nonZeroValReached == FALSE){
+ if (! nonZeroValReached) {
digIndx = 2;
trailingZeroCount = 0;
numTempBuf[2] = 6;
}
- if ((digIndx + trailingZeroCount) % 2 != 0){
+ if ((digIndx + trailingZeroCount) % 2 != 0) {
numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6;
digIndx += 1; // The implicit leading zero
}
- if (trailingZeroCount % 2 != 0){
+ if (trailingZeroCount % 2 != 0) {
// We had to consume one trailing zero for the low digit
// of the least significant byte
digIndx += 1; // The trailing zero not in the exponent
@@ -3764,8 +4060,7 @@
(UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
UCOL_BYTE_COMMON; // Tertiary weight.
i = endIndex - 1; // Reset the index into the buffer.
- while(i >= 2)
- {
+ while(i >= 2) {
primWeight = numTempBuf[i--] << 8;
if ( i >= 2)
primWeight |= numTempBuf[i--];
@@ -3776,13 +4071,13 @@
source->toReturn = source->CEpos -1;
return *(source->toReturn);
- }
- else {
+ } else {
CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
CE = *(CEOffset++);
break;
}
}
+
case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
{
static const uint32_t
@@ -3809,18 +4104,37 @@
V += VBase;
T += TBase;
+ if (source->offsetBuffer == NULL) {
+ source->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
+ source->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
+ source->offsetStore = source->offsetBuffer;
+ }
+
+ int32_t firstOffset = (int32_t)(source->pos - source->string);
+
+ *(source->offsetStore++) = firstOffset;
+
/*
- return the first CE, but first put the rest into the expansion buffer
- */
- if (!source->coll->image->jamoSpecial)
- {
+ * return the first CE, but first put the rest into the expansion buffer
+ */
+ if (!source->coll->image->jamoSpecial) {
*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
- if (T != TBase)
+ *(source->offsetStore++) = firstOffset + 1;
+
+ if (T != TBase) {
*(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
+ *(source->offsetStore++) = firstOffset + 1;
+ }
source->toReturn = source->CEpos - 1;
- return *(source->toReturn);
+
+ source->offsetReturn = source->offsetStore - 1;
+ if (source->offsetReturn == source->offsetBuffer) {
+ source->offsetStore = source->offsetBuffer;
+ }
+
+ return *(source->toReturn);
} else {
// Since Hanguls pass the FCD check, it is
// guaranteed that we won't be in
@@ -3862,18 +4176,46 @@
return(UCOL_IGNORABLE);
}
}
+
case IMPLICIT_TAG: /* everything that is not defined otherwise */
+#if 0
+ if (source->offsetBuffer == NULL) {
+ source->offsetBufferSize = UCOL_EXPAND_CE_BUFFER_SIZE;
+ source->offsetBuffer = (int32_t *) uprv_malloc(sizeof(int32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
+ source->offsetStore = source->offsetBuffer;
+ }
+
+ // **** doesn't work if using iterator ****
+ if (source->flags & UCOL_ITER_INNORMBUF) {
+ source->offsetRepeatCount = 1;
+ } else {
+ int32_t firstOffset = (int32_t)(source->pos - source->string);
+
+ *(source->offsetStore++) = firstOffset;
+ *(source->offsetStore++) = firstOffset + 1;
+
+ source->offsetReturn = source->offsetStore - 1;
+ if (source->offsetReturn == source->offsetBuffer) {
+ source->offsetStore = source->offsetBuffer;
+ }
+ }
+#endif
+
return getPrevImplicit(ch, source);
+
// TODO: Remove CJK implicits as they are handled by the getImplicitPrimary function
case CJK_IMPLICIT_TAG: /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/
return getPrevImplicit(ch, source);
+
case SURROGATE_TAG: /* This is a surrogate pair */
/* essentialy an engaged lead surrogate. */
/* if you have encountered it here, it means that a */
/* broken sequence was encountered and this is an error */
return 0;
+
case LEAD_SURROGATE_TAG: /* D800-DBFF*/
return 0; /* broken surrogate sequence */
+
case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/
{
UChar32 cp = 0;
@@ -3897,22 +4239,27 @@
} else {
return 0; /* completely ignorable */
}
+
return getPrevImplicit(cp, source);
}
+
/* UCA is filled with these. Tailorings are NOT_FOUND */
/* not yet implemented */
case CHARSET_TAG: /* this tag always returns */
/* probably after 1.8 */
return UCOL_NOT_FOUND;
+
default: /* this tag always returns */
*status = U_INTERNAL_PROGRAM_ERROR;
CE=0;
break;
}
+
if (CE <= UCOL_NOT_FOUND) {
break;
}
}
+
return CE;
}
Modified: trunk/source/i18n/ucol_imp.h
===================================================================
--- trunk/source/i18n/ucol_imp.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/ucol_imp.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -270,6 +270,12 @@
uint32_t *toReturn; /* This is the CE from CEs buffer that should be returned */
uint32_t *CEpos; /* This is the position to which we have stored processed CEs */
+
+ int32_t *offsetReturn; /* This is the offset to return, if non-NULL */
+ int32_t *offsetStore; /* This is the pointer for storing offsets */
+ int32_t offsetRepeatCount; /* Repeat stored offset if non-zero */
+ int32_t offsetRepeatValue; /* offset value to repeat */
+
UChar *writableBuffer;
uint32_t writableBufSize;
UChar *fcdPosition; /* Position in the original string to continue FCD check from. */
@@ -280,6 +286,10 @@
int32_t extendCEsSize; /* Holds the size of the dynamic CEs buffer */
uint32_t CEs[UCOL_EXPAND_CE_BUFFER_SIZE]; /* This is where we store CEs */
UChar stackWritableBuffer[UCOL_WRITABLE_BUFFER_SIZE]; /* A writable buffer. */
+
+ int32_t *offsetBuffer; /* A dynamic buffer to hold offsets */
+ int32_t offsetBufferSize; /* The size of the offset buffer */
+
UCharIterator *iterator;
/*int32_t iteratorIndex;*/
} collIterate;
@@ -293,6 +303,7 @@
*/
struct collIterateState {
UChar *pos; /* This is position in the string. Can be to original or writable buf */
+ UChar *returnPos;
UChar *fcdPosition; /* Position in the original string to continue FCD check from. */
UChar *bufferaddress; /* address of the normalization buffer */
uint32_t buffersize;
@@ -305,6 +316,8 @@
U_CAPI void U_EXPORT2
uprv_init_collIterate(const UCollator *collator, const UChar *sourceString, int32_t sourceLen, collIterate *s);
+struct UCollationPCE;
+typedef struct UCollationPCE UCollationPCE;
struct UCollationElements
{
@@ -320,9 +333,17 @@
* Indicates if the data should be deleted.
*/
UBool isWritable;
+
+/**
+ * Data for getNextProcessed, getPreviousProcessed.
+ */
+ UCollationPCE *pce;
};
+U_CAPI void U_EXPORT2
+uprv_init_pce(const struct UCollationElements *elems);
+
#define UCOL_LEVELTERMINATOR 1
/* mask off anything but primary order */
Modified: trunk/source/i18n/ucoleitr.cpp
===================================================================
--- trunk/source/i18n/ucoleitr.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/ucoleitr.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -20,6 +20,7 @@
#include "unicode/ucoleitr.h"
#include "unicode/ustring.h"
#include "unicode/sortkey.h"
+#include "unicode/uobject.h"
#include "ucol_imp.h"
#include "cmemory.h"
@@ -27,8 +28,269 @@
#define BUFFER_LENGTH 100
+#define DEFAULT_BUFFER_SIZE 16
+#define BUFFER_GROW 8
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
+
+#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
+
+#define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0])
+
+#define DELETE_ARRAY(array) uprv_free((void *) (array))
+
typedef struct collIterate collIterator;
+struct RCEI
+{
+ uint32_t ce;
+ int32_t low;
+ int32_t high;
+};
+
+struct RCEBuffer
+{
+ RCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
+ RCEI *buffer;
+ int32_t bufferIndex;
+ int32_t bufferSize;
+
+ RCEBuffer();
+ ~RCEBuffer();
+
+ UBool empty() const;
+ void put(uint32_t ce, int32_t ixLow, int32_t ixHigh);
+ const RCEI *get();
+};
+
+RCEBuffer::RCEBuffer()
+{
+ buffer = defaultBuffer;
+ bufferIndex = 0;
+ bufferSize = DEFAULT_BUFFER_SIZE;
+}
+
+RCEBuffer::~RCEBuffer()
+{
+ if (buffer != defaultBuffer) {
+ DELETE_ARRAY(buffer);
+ }
+}
+
+UBool RCEBuffer::empty() const
+{
+ return bufferIndex <= 0;
+}
+
+void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh)
+{
+ if (bufferIndex >= bufferSize) {
+ RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
+
+ ARRAY_COPY(newBuffer, buffer, bufferSize);
+
+ if (buffer != defaultBuffer) {
+ DELETE_ARRAY(buffer);
+ }
+
+ buffer = newBuffer;
+ bufferSize += BUFFER_GROW;
+ }
+
+ buffer[bufferIndex].ce = ce;
+ buffer[bufferIndex].low = ixLow;
+ buffer[bufferIndex].high = ixHigh;
+
+ bufferIndex += 1;
+}
+
+const RCEI *RCEBuffer::get()
+{
+ if (bufferIndex > 0) {
+ return &buffer[--bufferIndex];
+ }
+
+ return NULL;
+}
+
+struct PCEI
+{
+ uint64_t ce;
+ int32_t low;
+ int32_t high;
+};
+
+struct PCEBuffer
+{
+ PCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
+ PCEI *buffer;
+ int32_t bufferIndex;
+ int32_t bufferSize;
+
+ PCEBuffer();
+ ~PCEBuffer();
+
+ void reset();
+ UBool empty() const;
+ void put(uint64_t ce, int32_t ixLow, int32_t ixHigh);
+ const PCEI *get();
+};
+
+PCEBuffer::PCEBuffer()
+{
+ buffer = defaultBuffer;
+ bufferIndex = 0;
+ bufferSize = DEFAULT_BUFFER_SIZE;
+}
+
+PCEBuffer::~PCEBuffer()
+{
+ if (buffer != defaultBuffer) {
+ DELETE_ARRAY(buffer);
+ }
+}
+
+void PCEBuffer::reset()
+{
+ bufferIndex = 0;
+}
+
+UBool PCEBuffer::empty() const
+{
+ return bufferIndex <= 0;
+}
+
+void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh)
+{
+ if (bufferIndex >= bufferSize) {
+ PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
+
+ ARRAY_COPY(newBuffer, buffer, bufferSize);
+
+ if (buffer != defaultBuffer) {
+ DELETE_ARRAY(buffer);
+ }
+
+ buffer = newBuffer;
+ bufferSize += BUFFER_GROW;
+ }
+
+ buffer[bufferIndex].ce = ce;
+ buffer[bufferIndex].low = ixLow;
+ buffer[bufferIndex].high = ixHigh;
+
+ bufferIndex += 1;
+}
+
+const PCEI *PCEBuffer::get()
+{
+ if (bufferIndex > 0) {
+ return &buffer[--bufferIndex];
+ }
+
+ return NULL;
+}
+
+/*
+ * This inherits from UObject so that
+ * it can be allocated by new and the
+ * constructor for PCEBuffer is called.
+ */
+struct UCollationPCE : public UObject
+{
+ PCEBuffer pceBuffer;
+ UCollationStrength strength;
+ UBool toShift;
+ UBool isShifted;
+ uint32_t variableTop;
+
+ UCollationPCE(UCollationElements *elems);
+ ~UCollationPCE();
+
+ void init(const UCollator *coll);
+
+ virtual UClassID getDynamicClassID() const;
+ static UClassID getStaticClassID();
+};
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE)
+
+UCollationPCE::UCollationPCE(UCollationElements *elems)
+{
+ init(elems->iteratordata_.coll);
+}
+
+void UCollationPCE::init(const UCollator *coll)
+{
+ UErrorCode status = U_ZERO_ERROR;
+
+ strength = ucol_getStrength(coll);
+ toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED;
+ isShifted = FALSE;
+ variableTop = coll->variableTopValue << 16;
+}
+
+UCollationPCE::~UCollationPCE()
+{
+ // nothing to do
+}
+
+inline uint64_t processCE(UCollationElements *elems, uint32_t ce)
+{
+ uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
+
+ // This is clean, but somewhat slow...
+ // We could apply the mask to ce and then
+ // just get all three orders...
+ switch(elems->pce->strength) {
+ default:
+ tertiary = ucol_tertiaryOrder(ce);
+ /* note fall-through */
+
+ case UCOL_SECONDARY:
+ secondary = ucol_secondaryOrder(ce);
+ /* note fall-through */
+
+ case UCOL_PRIMARY:
+ primary = ucol_primaryOrder(ce);
+ }
+
+ // Continuation?
+ if (elems->pce->toShift && (elems->pce->variableTop > ce && primary != 0)
+ || (elems->pce->isShifted && primary == 0)) {
+
+ if (primary == 0) {
+ return UCOL_IGNORABLE;
+ }
+
+ if (elems->pce->strength >= UCOL_QUATERNARY) {
+ quaternary = primary;
+ }
+
+ primary = secondary = tertiary = 0;
+ elems->pce->isShifted = TRUE;
+ } else {
+ if (elems->pce->strength >= UCOL_QUATERNARY) {
+ quaternary = 0xFFFF;
+ }
+
+ elems->pce->isShifted = FALSE;
+ }
+
+
+ return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
+}
+
+U_CAPI void U_EXPORT2
+uprv_init_pce(const UCollationElements *elems)
+{
+ if (elems->pce != NULL) {
+ elems->pce->init(elems->iteratordata_.coll);
+ }
+}
+
/* public methods ---------------------------------------------------- */
U_CAPI UCollationElements* U_EXPORT2
@@ -50,8 +312,9 @@
return NULL;
}
- result->reset_ = TRUE;
- result->isWritable = FALSE;
+ result->reset_ = TRUE;
+ result->isWritable = FALSE;
+ result->pce = NULL;
if (text == NULL) {
textLength = 0;
@@ -64,22 +327,34 @@
U_CAPI void U_EXPORT2
ucol_closeElements(UCollationElements *elems)
{
- if (elems != NULL) {
- collIterate *ci = &elems->iteratordata_;
- if (ci != NULL) {
- if (ci->writableBuffer != ci->stackWritableBuffer) {
- uprv_free(ci->writableBuffer);
- }
- if (ci->extendCEs) {
- uprv_free(ci->extendCEs);
- }
- }
- if (elems->isWritable && elems->iteratordata_.string != NULL)
- {
- uprv_free(elems->iteratordata_.string);
- }
- uprv_free(elems);
- }
+ if (elems != NULL) {
+ collIterate *ci = &elems->iteratordata_;
+
+ if (ci != NULL) {
+ if (ci->writableBuffer != ci->stackWritableBuffer) {
+ uprv_free(ci->writableBuffer);
+ }
+
+ if (ci->extendCEs) {
+ uprv_free(ci->extendCEs);
+ }
+
+ if (ci->offsetBuffer) {
+ uprv_free(ci->offsetBuffer);
+ }
+ }
+
+ if (elems->isWritable && elems->iteratordata_.string != NULL)
+ {
+ uprv_free(elems->iteratordata_.string);
+ }
+
+ if (elems->pce != NULL) {
+ delete elems->pce;
+ }
+
+ uprv_free(elems);
+ }
}
U_CAPI void U_EXPORT2
@@ -103,6 +378,9 @@
ci->writableBufSize = UCOL_WRITABLE_BUFFER_SIZE;
}
ci->fcdPosition = NULL;
+
+ //ci->offsetReturn = ci->offsetStore = NULL;
+ ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
}
U_CAPI int32_t U_EXPORT2
@@ -126,6 +404,52 @@
return result;
}
+U_CAPI int64_t U_EXPORT2
+ucol_nextProcessed(UCollationElements *elems,
+ int32_t *ixLow,
+ int32_t *ixHigh,
+ UErrorCode *status)
+{
+ const UCollator *coll = elems->iteratordata_.coll;
+ int64_t result = UCOL_IGNORABLE;
+ uint32_t low = 0, high = 0;
+
+ if (U_FAILURE(*status)) {
+ return UCOL_PROCESSED_NULLORDER;
+ }
+
+ if (elems->pce == NULL) {
+ elems->pce = new UCollationPCE(elems);
+ } else {
+ elems->pce->pceBuffer.reset();
+ }
+
+ elems->reset_ = FALSE;
+
+ do {
+ low = ucol_getOffset(elems);
+ uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, status);
+ high = ucol_getOffset(elems);
+
+ if (ce == UCOL_NO_MORE_CES) {
+ result = UCOL_PROCESSED_NULLORDER;
+ break;
+ }
+
+ result = processCE(elems, ce);
+ } while (result == UCOL_IGNORABLE);
+
+ if (ixLow != NULL) {
+ *ixLow = low;
+ }
+
+ if (ixHigh != NULL) {
+ *ixHigh = high;
+ }
+
+ return result;
+}
+
U_CAPI int32_t U_EXPORT2
ucol_previous(UCollationElements *elems,
UErrorCode *status)
@@ -161,12 +485,162 @@
}
}
+U_CAPI int64_t U_EXPORT2
+ucol_previousProcessed(UCollationElements *elems,
+ int32_t *ixLow,
+ int32_t *ixHigh,
+ UErrorCode *status)
+{
+ const UCollator *coll = elems->iteratordata_.coll;
+ int64_t result = UCOL_IGNORABLE;
+ // int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
+ // UCollationStrength strength = ucol_getStrength(coll);
+ // UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
+ // uint32_t variableTop = coll->variableTopValue;
+ int32_t low = 0, high = 0;
+
+ if (U_FAILURE(*status)) {
+ return UCOL_PROCESSED_NULLORDER;
+ }
+
+ if (elems->reset_ &&
+ (elems->iteratordata_.pos == elems->iteratordata_.string)) {
+ if (elems->iteratordata_.endp == NULL) {
+ elems->iteratordata_.endp = elems->iteratordata_.string +
+ u_strlen(elems->iteratordata_.string);
+ elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
+ }
+
+ elems->iteratordata_.pos = elems->iteratordata_.endp;
+ elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
+ }
+
+ if (elems->pce == NULL) {
+ elems->pce = new UCollationPCE(elems);
+ } else {
+ //elems->pce->pceBuffer.reset();
+ }
+
+ elems->reset_ = FALSE;
+
+ while (elems->pce->pceBuffer.empty()) {
+ // buffer raw CEs up to non-ignorable primary
+ RCEBuffer rceb;
+ uint32_t ce;
+
+ // **** do we need to reset rceb, or will it always be empty at this point ****
+ do {
+ high = ucol_getOffset(elems);
+ ce = ucol_getPrevCE(coll, &elems->iteratordata_, status);
+ low = ucol_getOffset(elems);
+
+ if (ce == UCOL_NO_MORE_CES) {
+ if (! rceb.empty()) {
+ break;
+ }
+
+ goto finish;
+ }
+
+ rceb.put(ce, low, high);
+ } while ((ce & UCOL_PRIMARYMASK) == 0);
+
+ // process the raw CEs
+ while (! rceb.empty()) {
+ const RCEI *rcei = rceb.get();
+
+ result = processCE(elems, rcei->ce);
+
+ if (result != UCOL_IGNORABLE) {
+ elems->pce->pceBuffer.put(result, rcei->low, rcei->high);
+ }
+ }
+ }
+
+finish:
+ if (elems->pce->pceBuffer.empty()) {
+ // **** Is -1 the right value for ixLow, ixHigh? ****
+ if (ixLow != NULL) {
+ *ixLow = -1;
+ }
+
+ if (ixHigh != NULL) {
+ *ixHigh = -1
+ ;
+ }
+ return UCOL_PROCESSED_NULLORDER;
+ }
+
+ const PCEI *pcei = elems->pce->pceBuffer.get();
+
+ if (ixLow != NULL) {
+ *ixLow = pcei->low;
+ }
+
+ if (ixHigh != NULL) {
+ *ixHigh = pcei->high;
+ }
+
+ return pcei->ce;
+}
+
U_CAPI int32_t U_EXPORT2
ucol_getMaxExpansion(const UCollationElements *elems,
int32_t order)
{
uint8_t result;
+
+#if 0
UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result);
+#else
+ const UCollator *coll = elems->iteratordata_.coll;
+ const uint32_t *start;
+ const uint32_t *limit;
+ const uint32_t *mid;
+ uint32_t strengthMask = 0;
+ uint32_t mOrder = (uint32_t) order;
+
+ switch (coll->strength)
+ {
+ default:
+ strengthMask |= UCOL_TERTIARYORDERMASK;
+ /* fall through */
+
+ case UCOL_SECONDARY:
+ strengthMask |= UCOL_SECONDARYORDERMASK;
+ /* fall through */
+
+ case UCOL_PRIMARY:
+ strengthMask |= UCOL_PRIMARYORDERMASK;
+ }
+
+ mOrder &= strengthMask;
+ start = (coll)->endExpansionCE;
+ limit = (coll)->lastEndExpansionCE;
+
+ while (start < limit - 1) {
+ mid = start + ((limit - start) >> 1);
+ if (mOrder <= (*mid & strengthMask)) {
+ limit = mid;
+ } else {
+ start = mid;
+ }
+ }
+
+ // FIXME: with a masked search, there might be more than one hit,
+ // so we need to look forward and backward from the match to find all
+ // of the hits...
+ if ((*start & strengthMask) == mOrder) {
+ result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE));
+ } else if ((*limit & strengthMask) == mOrder) {
+ result = *(coll->expansionCESize + (limit - coll->endExpansionCE));
+ } else if ((mOrder & 0xFFFF) == 0x00C0) {
+ result = 2;
+ } else {
+ result = 1;
+ }
+#endif
+
return result;
}
@@ -199,21 +673,30 @@
U_CAPI int32_t U_EXPORT2
ucol_getOffset(const UCollationElements *elems)
{
- const collIterate *ci = &(elems->iteratordata_);
- // while processing characters in normalization buffer getOffset will
- // return the next non-normalized character.
- // should be inline with the old implementation since the old codes uses
- // nextDecomp in normalizer which also decomposes the string till the
- // first base character is found.
- if (ci->flags & UCOL_ITER_INNORMBUF) {
- if (ci->fcdPosition == NULL) {
- return 0;
- }
- return (int32_t)(ci->fcdPosition - ci->string);
- }
- else {
- return (int32_t)(ci->pos - ci->string);
- }
+ const collIterate *ci = &(elems->iteratordata_);
+
+ if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) {
+ return ci->offsetRepeatValue;
+ }
+
+ if (ci->offsetReturn != NULL) {
+ return *ci->offsetReturn;
+ }
+
+ // while processing characters in normalization buffer getOffset will
+ // return the next non-normalized character.
+ // should be inline with the old implementation since the old codes uses
+ // nextDecomp in normalizer which also decomposes the string till the
+ // first base character is found.
+ if (ci->flags & UCOL_ITER_INNORMBUF) {
+ if (ci->fcdPosition == NULL) {
+ return 0;
+ }
+ return (int32_t)(ci->fcdPosition - ci->string);
+ }
+ else {
+ return (int32_t)(ci->pos - ci->string);
+ }
}
U_CAPI void U_EXPORT2
@@ -239,6 +722,10 @@
}
ci->fcdPosition = NULL;
elems->reset_ = FALSE;
+
+ ci->offsetReturn = NULL;
+ ci->offsetStore = ci->offsetBuffer;
+ ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
}
U_CAPI int32_t U_EXPORT2
Modified: trunk/source/i18n/ucurr.cpp
===================================================================
--- trunk/source/i18n/ucurr.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/ucurr.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -1119,161 +1119,253 @@
return myEnum;
}
+U_CAPI int32_t U_EXPORT2
+ucurr_countCurrencies(const char* locale,
+ UDate date,
+ UErrorCode* ec)
+{
+ int32_t currCount = 0;
+ int32_t resLen = 0;
+ const UChar* s = NULL;
+ if (ec != NULL && U_SUCCESS(*ec))
+ {
+ // local variables
+ UErrorCode localStatus = U_ZERO_ERROR;
+ char id[ULOC_FULLNAME_CAPACITY];
+ resLen = uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus);
-U_CAPI int32_t U_EXPORT2
-ucurr_forLocaleAndDate(const char* locale,
- UDate date,
- UChar* buff,
- int32_t buffCapacity,
+ // get country or country_variant in `id'
+ uint32_t variantType = idForLocale(locale, id, sizeof(id), ec);
+ if (U_FAILURE(*ec))
+ {
+ return 0;
+ }
+
+ // Remove variants, which is only needed for registration.
+ char *idDelim = strchr(id, VAR_DELIM);
+ if (idDelim)
+ {
+ idDelim[0] = 0;
+ }
+
+ // Look up the CurrencyMap element in the root bundle.
+ UResourceBundle *rb = ures_openDirect(NULL, CURRENCY_DATA, &localStatus);
+ UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
+
+ // Using the id derived from the local, get the currency data
+ UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
+
+ // process each currency to see which one is valid for the given date
+ if (U_SUCCESS(localStatus))
+ {
+ for (int32_t i=0; i<ures_getSize(countryArray); i++)
+ {
+ // get the currency resource
+ UResourceBundle *currencyRes = ures_getByIndex(countryArray, i, NULL, &localStatus);
+ s = ures_getStringByKey(currencyRes, "id", &resLen, &localStatus);
+
+ // get the from date
+ int32_t fromLength = 0;
+ UResourceBundle *fromRes = ures_getByKey(currencyRes, "from", NULL, &localStatus);
+ const int32_t *fromArray = ures_getIntVector(fromRes, &fromLength, &localStatus);
+
+ int64_t currDate64 = (int64_t)fromArray[0] << 32;
+ currDate64 |= ((int64_t)fromArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
+ UDate fromDate = (UDate)currDate64;
+
+ if (ures_getSize(currencyRes) > 2)
+ {
+ int32_t toLength = 0;
+ UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus);
+ const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus);
+
+ currDate64 = (int64_t)toArray[0] << 32;
+ currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
+ UDate toDate = (UDate)currDate64;
+
+ if ((fromDate <= date) && (date < toDate))
+ {
+ currCount++;
+ }
+
+ ures_close(toRes);
+ }
+ else
+ {
+ if (fromDate <= date)
+ {
+ currCount++;
+ }
+ }
+
+ // close open resources
+ ures_close(currencyRes);
+ ures_close(fromRes);
+
+ } // end For loop
+ } // end if (U_SUCCESS(localStatus))
+
+ ures_close(countryArray);
+
+ // Check for errors
+ if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR)
+ {
+ // There is nothing to fallback to.
+ // Report the failure/warning if possible.
+ *ec = localStatus;
+ }
+
+ if (U_SUCCESS(*ec))
+ {
+ // no errors
+ return currCount;
+ }
+
+ }
+
+ // If we got here, either error code is invalid or
+ // some argument passed is no good.
+ return 0;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucurr_forLocaleAndDate(const char* locale,
+ UDate date,
+ int32_t index,
+ UChar* buff,
+ int32_t buffCapacity,
UErrorCode* ec)
{
int32_t resLen = 0;
+ int32_t currIndex = 0;
const UChar* s = NULL;
if (ec != NULL && U_SUCCESS(*ec))
{
// check the arguments passed
- if ((buff && buffCapacity) || !buffCapacity)
+ if ((buff && buffCapacity) || !buffCapacity )
{
// local variables
UErrorCode localStatus = U_ZERO_ERROR;
char id[ULOC_FULLNAME_CAPACITY];
+ resLen = uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus);
- if ((resLen = uloc_getKeywordValue(locale, "currency", id, ULOC_FULLNAME_CAPACITY, &localStatus)))
+ // get country or country_variant in `id'
+ uint32_t variantType = idForLocale(locale, id, sizeof(id), ec);
+ if (U_FAILURE(*ec))
{
- // there is a currency keyword. Try to see if it's valid
- if(buffCapacity > resLen)
- {
- u_charsToUChars(id, buff, resLen);
- }
- }
- else
+ return 0;
+ }
+
+ // Remove variants, which is only needed for registration.
+ char *idDelim = strchr(id, VAR_DELIM);
+ if (idDelim)
{
- // get country or country_variant in `id'
- uint32_t variantType = idForLocale(locale, id, sizeof(id), ec);
- if (U_FAILURE(*ec))
- {
- return 0;
- }
+ idDelim[0] = 0;
+ }
+
+ // Look up the CurrencyMap element in the root bundle.
+ UResourceBundle *rb = ures_openDirect(NULL, CURRENCY_DATA, &localStatus);
+ UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
-#if !UCONFIG_NO_SERVICE
- const UChar* result = CReg::get(id);
- if (result) {
- if(buffCapacity > u_strlen(result)) {
- u_strcpy(buff, result);
- }
- return u_strlen(result);
- }
-#endif
+ // Using the id derived from the local, get the currency data
+ UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
- // Remove variants, which is only needed for registration.
- char *idDelim = strchr(id, VAR_DELIM);
- if (idDelim)
+ // process each currency to see which one is valid for the given date
+ bool matchFound = false;
+ if (U_SUCCESS(localStatus))
+ {
+ if ((index <= 0) || (index > ures_getSize(countryArray)))
{
- idDelim[0] = 0;
+ // requested index is out of bounds
+ ures_close(countryArray);
+ return 0;
}
-
- // Look up the CurrencyMap element in the root bundle.
- UResourceBundle *rb = ures_openDirect(NULL, CURRENCY_DATA, &localStatus);
- UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
- // Using the id derived from the local, get the currency data
- UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
-
- // process each currency to see which one is valid for the given date
- bool matchFound = false;
- if (U_SUCCESS(localStatus))
+ for (int32_t i=0; i<ures_getSize(countryArray); i++)
{
- for (int32_t i=0; i<ures_getSize(countryArray); i++)
- {
- // get the currency resource
- UResourceBundle *currencyRes = ures_getByIndex(countryArray, i, NULL, &localStatus);
- s = ures_getStringByKey(currencyRes, "id", &resLen, &localStatus);
+ // get the currency resource
+ UResourceBundle *currencyRes = ures_getByIndex(countryArray, i, NULL, &localStatus);
+ s = ures_getStringByKey(currencyRes, "id", &resLen, &localStatus);
- // get the from date
- int32_t fromLength = 0;
- UResourceBundle *fromRes = ures_getByKey(currencyRes, "from", NULL, &localStatus);
- const int32_t *fromArray = ures_getIntVector(fromRes, &fromLength, &localStatus);
+ // get the from date
+ int32_t fromLength = 0;
+ UResourceBundle *fromRes = ures_getByKey(currencyRes, "from", NULL, &localStatus);
+ const int32_t *fromArray = ures_getIntVector(fromRes, &fromLength, &localStatus);
- int64_t currDate64 = (int64_t)fromArray[0] << 32;
- currDate64 |= ((int64_t)fromArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
- UDate fromDate = (UDate)currDate64;
+ int64_t currDate64 = (int64_t)fromArray[0] << 32;
+ currDate64 |= ((int64_t)fromArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
+ UDate fromDate = (UDate)currDate64;
- if (ures_getSize(currencyRes) > 2)
- {
- int32_t toLength = 0;
- UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus);
- const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus);
+ if (ures_getSize(currencyRes) > 2)
+ {
+ int32_t toLength = 0;
+ UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus);
+ const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus);
- currDate64 = (int64_t)toArray[0] << 32;
- currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
- UDate toDate = (UDate)currDate64;
+ currDate64 = (int64_t)toArray[0] << 32;
+ currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF));
+ UDate toDate = (UDate)currDate64;
- if ((fromDate <= date) && (date < toDate))
+ if ((fromDate <= date) && (date < toDate))
+ {
+ currIndex++;
+ if (currIndex == index)
{
- matchFound = true;
+ matchFound = true;
}
+ }
- ures_close(toRes);
- }
- else
+ ures_close(toRes);
+ }
+ else
+ {
+ if (fromDate <= date)
{
- if (fromDate <= date)
+ currIndex++;
+ if (currIndex == index)
{
- matchFound = true;
+ matchFound = true;
}
}
+ }
- // close open resources
- ures_close(currencyRes);
- ures_close(fromRes);
+ // close open resources
+ ures_close(currencyRes);
+ ures_close(fromRes);
- // check for loop exit
- if (matchFound)
- {
- break;
- }
+ // check for loop exit
+ if (matchFound)
+ {
+ break;
+ }
- } // end For loop
- }
+ } // end For loop
+ }
- ures_close(countryArray);
+ ures_close(countryArray);
- // Due to gaps in the windows of time for valid currencies,
- // it is possible that no currency is valid for the given time.
- // In such a case, use ucurr_forLocale to get a default value.
- if (!matchFound)
- {
- return ucurr_forLocale(locale, buff, buffCapacity, ec);
- }
+ // Check for errors
+ if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR)
+ {
+ // There is nothing to fallback to.
+ // Report the failure/warning if possible.
+ *ec = localStatus;
+ }
- // Check for errors
- if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0)
+ if (U_SUCCESS(*ec))
+ {
+ // no errors
+ if((buffCapacity > resLen) && matchFound)
{
- // We don't know about it.
- // Check to see if we support the variant.
- uloc_getParent(locale, id, sizeof(id), ec);
- *ec = U_USING_FALLBACK_WARNING;
- return ucurr_forLocaleAndDate(id, date, buff, buffCapacity, ec);
+ // write out the currency value
+ u_strcpy(buff, s);
}
- else if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR)
+ else
{
- // There is nothing to fallback to.
- // Report the failure/warning if possible.
- *ec = localStatus;
+ return 0;
}
-
- if (U_SUCCESS(*ec))
- {
- // no errors
- if(buffCapacity > resLen)
- {
- // write out the currency value
- u_strcpy(buff, s);
- }
- }
-
}
// return null terminated currency string
Modified: trunk/source/i18n/unicode/coleitr.h
===================================================================
--- trunk/source/i18n/unicode/coleitr.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/coleitr.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,11 +1,16 @@
/*
******************************************************************************
- * Copyright (C) 1997-2005, International Business Machines
+ * Copyright (C) 1997-2008, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
*/
/**
+ * \file
+ * \brief C++ API: Collation Element Iterator.
+ */
+
+/**
* File coleitr.h
*
*
@@ -29,10 +34,6 @@
#include "unicode/utypes.h"
-/**
- * \file
- * \brief C++ API: Collation Element Iterator.
- */
#if !UCONFIG_NO_COLLATION
Modified: trunk/source/i18n/unicode/coll.h
===================================================================
--- trunk/source/i18n/unicode/coll.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/coll.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,11 +1,16 @@
/*
******************************************************************************
-* Copyright (C) 1996-2007, International Business Machines *
+* Copyright (C) 1996-2008, International Business Machines *
* Corporation and others. All Rights Reserved. *
******************************************************************************
*/
/**
+ * \file
+ * \brief C++ API: Collation Service.
+ */
+
+/**
* File coll.h
*
* Created by: Helena Shih
@@ -46,11 +51,6 @@
#include "unicode/utypes.h"
-/**
- * \file
- * \brief C++ API: Collation Service.
- */
-
#if !UCONFIG_NO_COLLATION
#include "unicode/uobject.h"
Modified: trunk/source/i18n/unicode/datefmt.h
===================================================================
--- trunk/source/i18n/unicode/datefmt.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/datefmt.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/*
********************************************************************************
- * Copyright (C) 1997-2007, International Business Machines
+ * Copyright (C) 1997-2008, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************************
*
@@ -414,6 +414,29 @@
static DateFormat* U_EXPORT2 createInstance(void);
/**
+ * Create a date/time formatter from skeleton and a given locale.
+ *
+ * Users are encouraged to use the skeleton macros defined in udat.h.
+ * For example, MONTH_WEEKDAY_DAY, which is "MMMMEEEEd",
+ * and which means the pattern should have day, month, and day-of-week
+ * fields, and follow the long date format defined in date time pattern.
+ * For example, for English, the full pattern should be
+ * "EEEE, MMMM d".
+ *
+ * Temporarily, this is an internal API, used by DateIntevalFormat only.
+ * There will be a new set of APIs for the same purpose coming soon.
+ * After which, this API will be replaced.
+ *
+ * @param skeleton the skeleton on which date format based.
+ * @param locale the given locale.
+ * @return a simple date formatter which the caller owns.
+ * @internal ICU 4.0
+ */
+ static DateFormat* U_EXPORT2 createPatternInstance(
+ const UnicodeString& skeleton,
+ const Locale& locale);
+
+ /**
* Creates a time formatter with the given formatting style for the given
* locale.
*
Added: trunk/source/i18n/unicode/dtitvfmt.h
===================================================================
--- trunk/source/i18n/unicode/dtitvfmt.h (rev 0)
+++ trunk/source/i18n/unicode/dtitvfmt.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,1039 @@
+/********************************************************************************
+* Copyright (C) 2008, International Business Machines Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTITVFMT.H
+*
+*******************************************************************************
+*/
+
+#ifndef DTITVFMT_H__
+#define DTITVFMT_H__
+
+
+/**
+ * \file
+ * \brief C++ API: Format and parse date interval in a language-independent manner.
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/utypes.h"
+#include "unicode/ucal.h"
+#include "unicode/smpdtfmt.h"
+#include "unicode/dtintrv.h"
+#include "unicode/dtitvinf.h"
+
+U_NAMESPACE_BEGIN
+
+
+
+/**
+ * DateIntervalFormat is a class for formatting and parsing date
+ * intervals in a language-independent manner.
+ *
+ * <P>
+ * Date interval means from one date to another date,
+ * for example, from "Jan 11, 2008" to "Jan 18, 2008".
+ * We introduced class DateInterval to represent it.
+ * DateInterval is a pair of UDate, which is
+ * the standard milliseconds since 24:00 GMT, Jan 1, 1970.
+ *
+ * <P>
+ * DateIntervalFormat formats a DateInterval into
+ * text as compactly as possible.
+ * For example, the date interval format from "Jan 11, 2008" to "Jan 18,. 2008"
+ * is "Jan 11-18, 2008" for English.
+ * And it parses text into DateInterval,
+ * although initially, parsing is not supported.
+ *
+ * <P>
+ * There is no structural information in date time patterns.
+ * For any punctuations and string literals inside a date time pattern,
+ * we do not know whether it is just a separator, or a prefix, or a suffix.
+ * Without such information, so, it is difficult to generate a sub-pattern
+ * (or super-pattern) by algorithm.
+ * So, formatting a DateInterval is pattern-driven. It is very
+ * similar to formatting in SimpleDateFormat.
+ * We introduce class DateIntervalInfo to save date interval
+ * patterns, similar to date time pattern in SimpleDateFormat.
+ *
+ * <P>
+ * Logically, the interval patterns are mappings
+ * from (skeleton, the_largest_different_calendar_field)
+ * to (date_interval_pattern).
+ *
+ * <P>
+ * A skeleton
+ * <ol>
+ * <li>
+ * only keeps the field pattern letter and ignores all other parts
+ * in a pattern, such as space, punctuations, and string literals.
+ * <li>
+ * hides the order of fields.
+ * <li>
+ * might hide a field's pattern letter length.
+ *
+ * For those non-digit calendar fields, the pattern letter length is
+ * important, such as MMM, MMMM, and MMMMM; EEE and EEEE,
+ * and the field's pattern letter length is honored.
+ *
+ * For the digit calendar fields, such as M or MM, d or dd, yy or yyyy,
+ * the field pattern length is ignored and the best match, which is defined
+ * in date time patterns, will be returned without honor the field pattern
+ * letter length in skeleton.
+ * </ol>
+ *
+ * <P>
+ * The calendar fields we support for interval formatting are:
+ * year, month, date, day-of-week, am-pm, hour, hour-of-day, and minute.
+ * Those calendar fields can be defined in the following order:
+ * year > month > date > hour (in day) > minute
+ *
+ * The largest different calendar fields between 2 calendars is the
+ * first different calendar field in above order.
+ *
+ * For example: the largest different calendar fields between "Jan 10, 2007"
+ * and "Feb 20, 2008" is year.
+ *
+ * <P>
+ * There is a set of pre-defined static skeleton strings.
+ * There are pre-defined interval patterns for those pre-defined skeletons
+ * in locales' resource files.
+ * For example, for a skeleton UDAT_YEAR_ABBR_MONTH_DAY, which is "yMMMd",
+ * in en_US, if the largest different calendar field between date1 and date2
+ * is "year", the date interval pattern is "MMM d, yyyy - MMM d, yyyy",
+ * such as "Jan 10, 2007 - Jan 10, 2008".
+ * If the largest different calendar field between date1 and date2 is "month",
+ * the date interval pattern is "MMM d - MMM d, yyyy",
+ * such as "Jan 10 - Feb 10, 2007".
+ * If the largest different calendar field between date1 and date2 is "day",
+ * the date interval pattern is ""MMM d-d, yyyy", such as "Jan 10-20, 2007".
+ *
+ * For date skeleton, the interval patterns when year, or month, or date is
+ * different are defined in resource files.
+ * For time skeleton, the interval patterns when am/pm, or hour, or minute is
+ * different are defined in resource files.
+ *
+ * <P>
+ * If a skeleton is not found in a locale's DateIntervalInfo, which means
+ * the interval patterns for the skeleton is not defined in resource file,
+ * the interval pattern will falls back to the interval "fallback" pattern
+ * defined in resource file.
+ * If the interval "fallback" pattern is not defined, the default fall-back
+ * is "{date0} - {data1}".
+ *
+ * <P>
+ * For the combination of date and time,
+ * The rule to genearte interval patterns are:
+ * <ul>
+ * <li>
+ * 1) when the year, month, or day differs, falls back to fall-back
+ * interval pattern, which mostly is the concatenate the two original
+ * expressions with a separator between,
+ * For example, interval pattern from "Jan 10, 2007 10:10 am"
+ * to "Jan 11, 2007 10:10am" is
+ * "Jan 10, 2007 10:10 am - Jan 11, 2007 10:10am"
+ * <li>
+ * 2) otherwise, present the date followed by the range expression
+ * for the time.
+ * For example, interval pattern from "Jan 10, 2007 10:10 am"
+ * to "Jan 10, 2007 11:10am" is "Jan 10, 2007 10:10 am - 11:10am"
+ * </ul>
+ *
+ *
+ * <P>
+ * If two dates are the same, the interval pattern is the single date pattern.
+ * For example, interval pattern from "Jan 10, 2007" to "Jan 10, 2007" is
+ * "Jan 10, 2007".
+ *
+ * Or if the presenting fields between 2 dates have the exact same values,
+ * the interval pattern is the single date pattern.
+ * For example, if user only requests year and month,
+ * the interval pattern from "Jan 10, 2007" to "Jan 20, 2007" is "Jan 2007".
+ *
+ * <P>
+ * DateIntervalFormat needs the following information for correct
+ * formatting: time zone, calendar type, pattern, date format symbols,
+ * and date interval patterns.
+ * It can be instantiated in several ways:
+ * <ul>
+ * <li>
+ * 1. create an instance using default or given locale plus given skeleton.
+ * Users are encouraged to created date interval formatter this way and
+ * to use the pre-defined skeleton macros, such as
+ * UDAT_YEAR_NUM_MONTH, which consists the calendar fields and
+ * the format style.
+ * 2. create an instance using default or given locale plus given skeleton
+ * plus a given DateIntervalInfo.
+ * This factory method is for powerful users who want to provide their own
+ * interval patterns.
+ * Locale provides the timezone, calendar, and format symbols information.
+ * Local plus skeleton provides full pattern information.
+ * DateIntervalInfo provides the date interval patterns.
+ * <li>
+ *
+ * <P>
+ * For the calendar field pattern letter, such as G, y, M, d, a, h, H, m, s etc.
+ * DateIntervalFormat uses the same syntax as that of
+ * DateTime format.
+ *
+ * <P>
+ * Code Sample: general usage
+ * <pre>
+ * \code
+ * // the date interval object which the DateIntervalFormat formats on
+ * // and parses into
+ * DateInterval* dtInterval = new DateInterval(1000*3600*24, 1000*3600*24*2);
+ * UErrorCode status = U_ZERO_ERROR;
+ * DateIntervalFormat* dtIntervalFmt = DateIntervalFormat::createInstance(
+ * UDAT_YEAR_MONTH_DAY,
+ * Locale("en", "GB", ""), status);
+ * UnicodeUnicodeString dateIntervalString;
+ * FieldPosition pos = 0;
+ * // formatting
+ * dtIntervalFmt->format(dtInterval, dateIntervalUnicodeString, pos, status);
+ * delete dtIntervalFmt;
+ * \endcode
+ * </pre>
+ */
+
+class U_I18N_API DateIntervalFormat : public Format {
+public:
+
+ /**
+ * Construct a DateIntervalFormat from skeleton and the default locale.
+ *
+ * This is a convenient override of
+ * createInstance(const UnicodeString& skeleton, const Locale& locale,
+ * UErrorCode&)
+ * with the value of locale as default locale.
+ *
+ * @param skeleton the skeleton on which interval format based.
+ * @param status output param set to success/failure code on exit
+ * @return a date time interval formatter whick the caller owns.
+ * @draft ICU 4.0
+ */
+ static DateIntervalFormat* U_EXPORT2 createInstance(
+ const UnicodeString& skeleton,
+ UErrorCode& status);
+
+ /**
+ * Construct a DateIntervalFormat from skeleton and a given locale.
+ *
+ * There are pre-defined skeletons (defined in udate.h) having predefined
+ * interval patterns in resource files.
+ * Users are encouraged to use those macros.
+ * For example:
+ * DateIntervalFormat::createInstance(UDAT_MONTH_DAY, status)
+ *
+ * The given Locale provides the interval patterns.
+ * For example, for en_GB, if skeleton is UDAT_YEAR_ABBR_MONTH_WEEKDAY_DAY,
+ * which is "yMMMEEEd",
+ * the interval patterns defined in resource file to above skeleton are:
+ * "EEE, d MMM, yyyy - EEE, d MMM, yyyy" for year differs,
+ * "EEE, d MMM - EEE, d MMM, yyyy" for month differs,
+ * "EEE, d - EEE, d MMM, yyyy" for day differs,
+ * @param skeleton the skeleton on which interval format based.
+ * @param locale the given locale
+ * @param status output param set to success/failure code on exit
+ * @return a date time interval formatter whick the caller owns.
+ * @draft ICU 4.0
+ */
+
+ static DateIntervalFormat* U_EXPORT2 createInstance(
+ const UnicodeString& skeleton,
+ const Locale& locale,
+ UErrorCode& status);
+
+ /**
+ * Construct a DateIntervalFormat from skeleton
+ * DateIntervalInfo, and default locale.
+ *
+ * This is a convenient override of
+ * createInstance(const UnicodeString& skeleton, const Locale& locale,
+ * DateIntervalInfo* dtitvinf, UErrorCode&)
+ * with the locale value as default locale.
+ *
+ * Note: the DateIntervalFormat takes ownership of
+ * DateIntervalInfo objects.
+ * Caller should not delete them.
+ *
+ * @param skeleton the skeleton on which interval format based.
+ * @param dtitvinf the DateIntervalInfo object to be adopted.
+ * @param status output param set to success/failure code on exit
+ * @return a date time interval formatter whick the caller owns.
+ * @draft ICU 4.0
+ */
+ static DateIntervalFormat* U_EXPORT2 createInstance(
+ const UnicodeString& skeleton,
+ DateIntervalInfo* dtitvinf,
+ UErrorCode& status);
+
+ /**
+ * Construct a DateIntervalFormat from skeleton
+ * a DateIntervalInfo, and the given locale.
+ *
+ * There are pre-defined skeletons (defined in udate.h) having predefined
+ * interval patterns in resource files.
+ * Users are encouraged to use those macros.
+ * For example:
+ * DateIntervalFormat::createInstance(UDAT_MONTH_DAY, status)
+ *
+ * the DateIntervalInfo provides the interval patterns.
+ *
+ * User are encouraged to set default interval pattern in DateIntervalInfo
+ * as well, if they want to set other interval patterns ( instead of
+ * reading the interval patterns from resource files).
+ * When the corresponding interval pattern for a largest calendar different
+ * field is not found ( if user not set it ), interval format fallback to
+ * the default interval pattern.
+ * If user does not provide default interval pattern, it fallback to
+ * "{date0} - {date1}"
+ *
+ * Note: the DateIntervalFormat takes ownership of
+ * DateIntervalInfo objects.
+ * Caller should not delete them.
+ *
+ * @param skeleton the skeleton on which interval format based.
+ * @param locale the given locale
+ * @param dtitvinf the DateIntervalInfo object to be adopted.
+ * @param status output param set to success/failure code on exit
+ * @return a date time interval formatter whick the caller owns.
+ * @draft ICU 4.0
+ */
+ static DateIntervalFormat* U_EXPORT2 createInstance(
+ const UnicodeString& skeleton,
+ const Locale& locale,
+ DateIntervalInfo* dtitvinf,
+ UErrorCode& status);
+
+ /**
+ * Destructor.
+ * @draft ICU 4.0
+ */
+ virtual ~DateIntervalFormat();
+
+ /**
+ * Clone this Format object polymorphically. The caller owns the result and
+ * should delete it when done.
+ * @return A copy of the object.
+ * @draft ICU 4.0
+ */
+ virtual Format* clone(void) const;
+
+ /**
+ * Return true if the given Format objects are semantically equal. Objects
+ * of different subclasses are considered unequal.
+ * @param other the object to be compared with.
+ * @return true if the given Format objects are semantically equal.
+ * @draft ICU 4.0
+ */
+ virtual UBool operator==(const Format& other) const;
+
+ /**
+ * Return true if the given Format objects are not semantically equal.
+ * Objects of different subclasses are considered unequal.
+ * @param other the object to be compared with.
+ * @return true if the given Format objects are not semantically equal.
+ * @draft ICU 4.0
+ */
+ UBool operator!=(const Format& other) const;
+
+ /**
+ * Format an object to produce a string. This method handles Formattable
+ * objects with a DateInterval type.
+ * If a the Formattable object type is not a DateInterval,
+ * then it returns a failing UErrorCode.
+ *
+ * @param obj The object to format.
+ * Must be a DateInterval.
+ * @param appendTo Output parameter to receive result.
+ * Result is appended to existing contents.
+ * @param fieldPosition On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @param status Output param filled with success/failure status.
+ * @return Reference to 'appendTo' parameter.
+ * @draft ICU 4.0
+ */
+ virtual UnicodeString& format(const Formattable& obj,
+ UnicodeString& appendTo,
+ FieldPosition& fieldPosition,
+ UErrorCode& status) const ;
+
+
+
+ /**
+ * Format a DateInterval to produce a string.
+ *
+ * @param dtInterval DateInterval to be formatted.
+ * @param appendTo Output parameter to receive result.
+ * Result is appended to existing contents.
+ * @param fieldPosition On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @param status Output param filled with success/failure status.
+ * @return Reference to 'appendTo' parameter.
+ * @draft ICU 4.0
+ */
+ UnicodeString& format(const DateInterval* dtInterval,
+ UnicodeString& appendTo,
+ FieldPosition& fieldPosition,
+ UErrorCode& status) const ;
+
+
+ /**
+ * Format 2 Calendars to produce a string.
+ *
+ * Note: "fromCalendar" and "toCalendar" are not const,
+ * since calendar is not const in SimpleDateFormat::format(Calendar&),
+ *
+ * @param fromCalendar calendar set to the from date in date interval
+ * to be formatted into date interval stirng
+ * @param toCalendar calendar set to the to date in date interval
+ * to be formatted into date interval stirng
+ * @param appendTo Output parameter to receive result.
+ * Result is appended to existing contents.
+ * @param fieldPosition On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @param status Output param filled with success/failure status.
+ * Caller needs to make sure it is SUCCESS
+ * at the function entrance
+ * @return Reference to 'appendTo' parameter.
+ * @draft ICU 4.0
+ */
+ UnicodeString& format(Calendar& fromCalendar,
+ Calendar& toCalendar,
+ UnicodeString& appendTo,
+ FieldPosition& fieldPosition,
+ UErrorCode& status) const ;
+
+ /**
+ * Parse a string to produce an object. This methods handles parsing of
+ * date time interval strings into Formattable objects with
+ * DateInterval type, which is a pair of UDate.
+ * <P>
+ * In ICU 4.0, date interval format is not supported.
+ * <P>
+ * Before calling, set parse_pos.index to the offset you want to start
+ * parsing at in the source. After calling, parse_pos.index is the end of
+ * the text you parsed. If error occurs, index is unchanged.
+ * <P>
+ * When parsing, leading whitespace is discarded (with a successful parse),
+ * while trailing whitespace is left as is.
+ * <P>
+ * See Format::parseObject() for more.
+ *
+ * @param source The string to be parsed into an object.
+ * @param result Formattable to be set to the parse result.
+ * If parse fails, return contents are undefined.
+ * @param parse_pos The position to start parsing at. Upon return
+ * this param is set to the position after the
+ * last character successfully parsed. If the
+ * source is not parsed successfully, this param
+ * will remain unchanged.
+ * @return A newly created Formattable* object, or NULL
+ * on failure. The caller owns this and should
+ * delete it when done.
+ * @draft ICU 4.0
+ */
+ virtual void parseObject(const UnicodeString& source,
+ Formattable& result,
+ ParsePosition& parse_pos) const;
+
+
+ /**
+ * Gets the date time interval patterns.
+ * @return a copy of the date time interval patterns associated with
+ * this date interval formatter.
+ * @draft ICU 4.0
+ */
+ const DateIntervalInfo* getDateIntervalInfo(void) const;
+
+
+ /**
+ * Set the date time interval patterns.
+ * @param newIntervalPatterns the given interval patterns to copy.
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+ void setDateIntervalInfo(const DateIntervalInfo& newIntervalPatterns,
+ UErrorCode& status);
+
+ /**
+ * Set the date time interval patterns.
+ * The caller no longer owns the DateIntervalInfo object and
+ * should not delete it after making this call.
+ * @param newIntervalPatterns the given interval patterns to copy.
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+ void adoptDateIntervalInfo(DateIntervalInfo* newIntervalPatterns,
+ UErrorCode& status);
+
+
+ /**
+ * Gets the date formatter
+ * @return a copy of the date formatter associated with
+ * this date interval formatter.
+ * @draft ICU 4.0
+ */
+ const DateFormat* getDateFormat(void) const;
+
+
+ /**
+ * Set the date formatter.
+ * @param newDateFormat the given date formatter to copy.
+ * caller needs to make sure that
+ * it is a SimpleDateFormatter.
+ * @param status Output param set to success/failure code.
+ * caller needs to make sure it is SUCCESS
+ * at the function entrance.
+ * @draft ICU 4.0
+ */
+ void setDateFormat(const DateFormat& newDateFormat, UErrorCode& status);
+
+ /**
+ * Set the date formatter.
+ * The caller no longer owns the DateFormat object and
+ * should not delete it after making this call.
+ * @param newDateFormat the given date formatter to copy.
+ * caller needs to make sure that
+ * it is a SimpleDateFormatter.
+ * @param status Output param set to success/failure code.
+ * caller needs to make sure it is SUCCESS
+ * at the function entrance.
+ * @draft ICU 4.0
+ */
+ void adoptDateFormat(DateFormat* newDateFormat, UErrorCode& status);
+
+
+ /**
+ * Return the class ID for this class. This is useful only for comparing to
+ * a return value from getDynamicClassID(). For example:
+ * <pre>
+ * . Base* polymorphic_pointer = createPolymorphicObject();
+ * . if (polymorphic_pointer->getDynamicClassID() ==
+ * . erived::getStaticClassID()) ...
+ * </pre>
+ * @return The class ID for all objects of this class.
+ * @draft ICU 4.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+ * method is to implement a simple version of RTTI, since not all C++
+ * compilers support genuine RTTI. Polymorphic operator==() and clone()
+ * methods call this method.
+ *
+ * @return The class ID for this object. All objects of a
+ * given class have the same class ID. Objects of
+ * other classes have different class IDs.
+ * @draft ICU 4.0
+ */
+ virtual UClassID getDynamicClassID(void) const;
+
+protected:
+
+ /**
+ * Copy constructor.
+ * @draft ICU 4.0
+ */
+ DateIntervalFormat(const DateIntervalFormat&);
+
+ /**
+ * Assignment operator.
+ * @draft ICU 4.0
+ */
+ DateIntervalFormat& operator=(const DateIntervalFormat&);
+
+private:
+
+ /**
+ * Save the interval pattern information.
+ * Interval pattern consists of 2 single date patterns and the separator.
+ * For example, interval pattern "MMM d - MMM d, yyyy" consists
+ * a single date pattern "MMM d", another single date pattern "MMM d, yyyy",
+ * and a separator "-".
+ * The pattern is divided into 2 parts. For above example,
+ * the first part is "MMM d - ", and the second part is "MMM d, yyyy".
+ * Also, the first date appears in an interval pattern could be
+ * the earlier date or the later date.
+ * And such information is saved in the interval pattern as well.
+ * FIXME: do I need to define an inner class
+ */
+ typedef struct PatternInfo {
+ UnicodeString firstPart;
+ UnicodeString secondPart;
+ /**
+ * Whether the first date in interval pattern is later date or not.
+ * Fallback format set the default ordering.
+ * And for a particular interval pattern, the order can be
+ * overriden by prefixing the interval pattern with "latestFirst:" or
+ * "earliestFirst:"
+ * For example, given 2 date, Jan 10, 2007 to Feb 10, 2007.
+ * if the fallback format is "{0} - {1}",
+ * and the pattern is "d MMM - d MMM yyyy", the interval format is
+ * "10 Jan - 10 Feb, 2007".
+ * If the pattern is "latestFirst:d MMM - d MMM yyyy",
+ * the interval format is "10 Feb - 10 Jan, 2007"
+ */
+ UBool laterDateFirst;
+ } PatternInfo;
+
+
+ /**
+ * default constructor
+ * @draft ICU 4.0
+ */
+ DateIntervalFormat();
+
+ /**
+ * Construct a DateIntervalFormat from DateFormat and a DateIntervalInfo.
+ *
+ * This is the convenient override of
+ * DateIntervalFormat(DateFormat, DateIntervalInfo, UnicodeString)
+ * with the UnicodeString value as null.
+ *
+ * @param dtfmt the SimpleDateFormat object to be adopted.
+ * @param dtitvinf the DateIntervalInfo object to be adopted.
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+ DateIntervalFormat(DateFormat* dtfmt, DateIntervalInfo* dtItvInfo,
+ UErrorCode& status);
+
+
+ /**
+ * Construct a DateIntervalFormat from DateFormat,
+ * a DateIntervalInfo, and skeleton.
+ * DateFormat provides the timezone, calendar,
+ * full pattern, and date format symbols information.
+ * It should be a SimpleDateFormat object which
+ * has a pattern in it.
+ * the DateIntervalInfo provides the interval patterns.
+ *
+ * Note: the DateIntervalFormat takes ownership of both
+ * DateFormat and DateIntervalInfo objects.
+ * Caller should not delete them.
+ *
+ * @param dtfmt the SimpleDateFormat object to be adopted.
+ * @param dtitvinf the DateIntervalInfo object to be adopted.
+ * @param skeleton the skeleton of the date formatter
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+ DateIntervalFormat(DateFormat* dtfmt, DateIntervalInfo* dtItvInfo,
+ const UnicodeString* skeleton, UErrorCode& status);
+
+
+
+ /**
+ * Construct a DateIntervalFormat from DateFormat
+ * and a DateIntervalInfo.
+ *
+ * It is a wrapper of the constructor.
+ *
+ * @param dtfmt the DateFormat object to be adopted.
+ * @param dtitvinf the DateIntervalInfo object to be adopted.
+ * @param status Output param set to success/failure code.
+ * @return a date time interval formatter whick the caller owns.
+ * @draft ICU 4.0
+ */
+ static DateIntervalFormat* U_EXPORT2 create(DateFormat* dtfmt,
+ DateIntervalInfo* dtitvinf,
+ UErrorCode& status);
+
+
+
+ /**
+ * Construct a DateIntervalFormat from DateFormat
+ * and a DateIntervalInfo.
+ *
+ * It is a wrapper of the constructor.
+ *
+ * @param dtfmt the DateFormat object to be adopted.
+ * @param dtitvinf the DateIntervalInfo object to be adopted.
+ * @param skeleton the skeleton of this formatter.
+ * @param status Output param set to success/failure code.
+ * @return a date time interval formatter whick the caller owns.
+ * @draft ICU 4.0
+ */
+ static DateIntervalFormat* U_EXPORT2 create(DateFormat* dtfmt,
+ DateIntervalInfo* dtitvinf,
+ const UnicodeString* skeleton,
+ UErrorCode& status);
+
+
+ /**
+ * Below are for generating interval patterns locale to the formatter
+ */
+
+
+ /**
+ * Format 2 Calendars using fall-back interval pattern
+ *
+ * The full pattern used in this fall-back format is the
+ * full pattern of the date formatter.
+ *
+ * @param fromCalendar calendar set to the from date in date interval
+ * to be formatted into date interval stirng
+ * @param toCalendar calendar set to the to date in date interval
+ * to be formatted into date interval stirng
+ * @param appendTo Output parameter to receive result.
+ * Result is appended to existing contents.
+ * @param pos On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @param status output param set to success/failure code on exit
+ * @return Reference to 'appendTo' parameter.
+ * @draft ICU 4.0
+ */
+ UnicodeString& fallbackFormat(Calendar& fromCalendar,
+ Calendar& toCalendar,
+ UnicodeString& appendTo,
+ FieldPosition& pos,
+ UErrorCode& status) const;
+
+
+
+ /**
+ * Initialize interval patterns locale to this formatter
+ *
+ * This code is a bit complicated since
+ * 1. the interval patterns saved in resource bundle files are interval
+ * patterns based on date or time only.
+ * It does not have interval patterns based on both date and time.
+ * Interval patterns on both date and time are algorithm generated.
+ *
+ * For example, it has interval patterns on skeleton "dMy" and "hm",
+ * but it does not have interval patterns on skeleton "dMyhm".
+ *
+ * The rule to genearte interval patterns for both date and time skeleton are
+ * 1) when the year, month, or day differs, concatenate the two original
+ * expressions with a separator between,
+ * For example, interval pattern from "Jan 10, 2007 10:10 am"
+ * to "Jan 11, 2007 10:10am" is
+ * "Jan 10, 2007 10:10 am - Jan 11, 2007 10:10am"
+ *
+ * 2) otherwise, present the date followed by the range expression
+ * for the time.
+ * For example, interval pattern from "Jan 10, 2007 10:10 am"
+ * to "Jan 10, 2007 11:10am" is
+ * "Jan 10, 2007 10:10 am - 11:10am"
+ *
+ * 2. even a pattern does not request a certion calendar field,
+ * the interval pattern needs to include such field if such fields are
+ * different between 2 dates.
+ * For example, a pattern/skeleton is "hm", but the interval pattern
+ * includes year, month, and date when year, month, and date differs.
+ *
+ *
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+ void initializePattern(UErrorCode& status);
+
+
+
+ /**
+ * Set fall back interval pattern given a calendar field,
+ * a skeleton, and a date time pattern generator.
+ * @param field the largest different calendar field
+ * @param skeleton a skeleton
+ * @param dtpng date time pattern generator
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+ void setFallbackPattern(UCalendarDateFields field,
+ const UnicodeString& skeleton,
+ DateTimePatternGenerator* dtpng,
+ UErrorCode& status);
+
+
+
+ /**
+ * get separated date and time skeleton from a combined skeleton.
+ *
+ * The difference between date skeleton and normalizedDateSkeleton are:
+ * 1. both 'y' and 'd' are appeared only once in normalizeDateSkeleton
+ * 2. 'E' and 'EE' are normalized into 'EEE'
+ * 3. 'MM' is normalized into 'M'
+ *
+ ** the difference between time skeleton and normalizedTimeSkeleton are:
+ * 1. both 'H' and 'h' are normalized as 'h' in normalized time skeleton,
+ * 2. 'a' is omitted in normalized time skeleton.
+ * 3. there is only one appearance for 'h', 'm','v', 'z' in normalized time
+ * skeleton
+ *
+ *
+ * @param skeleton given combined skeleton.
+ * @param date Output parameter for date only skeleton.
+ * @param normalizedDate Output parameter for normalized date only
+ *
+ * @param time Output parameter for time only skeleton.
+ * @param normalizedTime Output parameter for normalized time only
+ * skeleton.
+ *
+ * @draft ICU 4.0
+ */
+ static void U_EXPORT2 getDateTimeSkeleton(const UnicodeString& skeleton,
+ UnicodeString& date,
+ UnicodeString& normalizedDate,
+ UnicodeString& time,
+ UnicodeString& normalizedTime);
+
+
+
+ /**
+ * Generate date or time interval pattern from resource,
+ * and set them into the interval pattern locale to this formatter.
+ *
+ * It needs to handle the following:
+ * 1. need to adjust field width.
+ * For example, the interval patterns saved in DateIntervalInfo
+ * includes "dMMMy", but not "dMMMMy".
+ * Need to get interval patterns for dMMMMy from dMMMy.
+ * Another example, the interval patterns saved in DateIntervalInfo
+ * includes "hmv", but not "hmz".
+ * Need to get interval patterns for "hmz' from 'hmv'
+ *
+ * 2. there might be no pattern for 'y' differ for skeleton "Md",
+ * in order to get interval patterns for 'y' differ,
+ * need to look for it from skeleton 'yMd'
+ *
+ * @param dateSkeleton normalized date skeleton
+ * @param timeSkeleton normalized time skeleton
+ * @return whether the resource is found for the skeleton.
+ * TRUE if interval pattern found for the skeleton,
+ * FALSE otherwise.
+ * @draft ICU 4.0
+ */
+ UBool setSeparateDateTimePtn(const UnicodeString& dateSkeleton,
+ const UnicodeString& timeSkeleton);
+
+
+
+
+ /**
+ * Generate interval pattern from existing resource
+ *
+ * It not only save the interval patterns,
+ * but also return the extended skeleton and its best match skeleton.
+ *
+ * @param field largest different calendar field
+ * @param skeleton skeleton
+ * @param bestSkeleton the best match skeleton which has interval pattern
+ * defined in resource
+ * @param differenceInfo the difference between skeleton and best skeleton
+ * 0 means the best matched skeleton is the same as input skeleton
+ * 1 means the fields are the same, but field width are different
+ * 2 means the only difference between fields are v/z,
+ * -1 means there are other fields difference
+ *
+ * @param extendedSkeleton extended skeleton
+ * @param extendedBestSkeleton extended best match skeleton
+ * @return whether the interval pattern is found
+ * through extending skeleton or not.
+ * TRUE if interval pattern is found by
+ * extending skeleton, FALSE otherwise.
+ * @draft ICU 4.0
+ */
+ UBool setIntervalPattern(UCalendarDateFields field,
+ const UnicodeString* skeleton,
+ const UnicodeString* bestSkeleton,
+ int8_t differenceInfo,
+ UnicodeString* extendedSkeleton = NULL,
+ UnicodeString* extendedBestSkeleton = NULL);
+
+ /**
+ * Adjust field width in best match interval pattern to match
+ * the field width in input skeleton.
+ *
+ * TODO (xji) make a general solution
+ * The adjusting rule can be:
+ * 1. always adjust
+ * 2. never adjust
+ * 3. default adjust, which means adjust according to the following rules
+ * 3.1 always adjust string, such as MMM and MMMM
+ * 3.2 never adjust between string and numeric, such as MM and MMM
+ * 3.3 always adjust year
+ * 3.4 do not adjust 'd', 'h', or 'm' if h presents
+ * 3.5 do not adjust 'M' if it is numeric(?)
+ *
+ * Since date interval format is well-formed format,
+ * date and time skeletons are normalized previously,
+ * till this stage, the adjust here is only "adjust strings, such as MMM
+ * and MMMM, EEE and EEEE.
+ *
+ * @param inputSkeleton the input skeleton
+ * @param bestMatchSkeleton the best match skeleton
+ * @param bestMatchIntervalpattern the best match interval pattern
+ * @param differenceInfo the difference between 2 skeletons
+ * 1 means only field width differs
+ * 2 means v/z exchange
+ * @param adjustedIntervalPattern adjusted interval pattern
+ * @draft ICU 4.0
+ */
+ static void U_EXPORT2 adjustFieldWidth(
+ const UnicodeString& inputSkeleton,
+ const UnicodeString& bestMatchSkeleton,
+ const UnicodeString& bestMatchIntervalPattern,
+ int8_t differenceInfo,
+ UnicodeString& adjustedIntervalPattern);
+
+ /**
+ * Concat a single date pattern with a time interval pattern,
+ * set it into the intervalPatterns, while field is time field.
+ * This is used to handle time interval patterns on skeleton with
+ * both time and date. Present the date followed by
+ * the range expression for the time.
+ * @param format date and time format
+ * @param formatLen format string length
+ * @param datePattern date pattern
+ * @param field time calendar field: AM_PM, HOUR, MINUTE
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+ void concatSingleDate2TimeInterval(const UChar* format,
+ int32_t formatLen,
+ const UnicodeString& datePattern,
+ UCalendarDateFields field,
+ UErrorCode& status);
+
+ /**
+ * check whether a calendar field present in a skeleton.
+ * @param field calendar field need to check
+ * @param skeleton given skeleton on which to check the calendar field
+ * @return true if field present in a skeleton.
+ * @draft ICU 4.0
+ */
+ static UBool U_EXPORT2 fieldExistsInSkeleton(UCalendarDateFields field,
+ const UnicodeString& skeleton);
+
+
+ /**
+ * Split interval patterns into 2 part.
+ * @param intervalPattern interval pattern
+ * @return the index in interval pattern which split the pattern into 2 part
+ * @draft ICU 4.0
+ */
+ static int32_t U_EXPORT2 splitPatternInto2Part(const UnicodeString& intervalPattern);
+
+
+ /**
+ * Break interval patterns as 2 part and save them into pattern info.
+ * @param field calendar field
+ * @param intervalPattern interval pattern
+ * @internal ICU 4.0
+ */
+ void setIntervalPattern(UCalendarDateFields field,
+ const UnicodeString& intervalPattern);
+
+
+ /**
+ * Break interval patterns as 2 part and save them into pattern info.
+ * @param field calendar field
+ * @param intervalPattern interval pattern
+ * @param laterDateFirst whether later date appear first in interval pattern
+ * @internal ICU 4.0
+ */
+ void setIntervalPattern(UCalendarDateFields field,
+ const UnicodeString& intervalPattern,
+ UBool laterDateFirst);
+
+
+ /**
+ * Set pattern information.
+ *
+ * @param field calendar field
+ * @param firstPart the first part in interval pattern
+ * @param secondPart the second part in interval pattern
+ * @param laterDateFirst whether the first date in intervalPattern
+ * is earlier date or later date
+ * @internal ICU 4.0
+ */
+ void setPatternInfo(UCalendarDateFields field,
+ const UnicodeString* firstPart,
+ const UnicodeString* secondpart,
+ UBool laterDateFirst);
+
+
+ // from calendar field to pattern letter
+ static const UChar fgCalendarFieldToPatternLetter[];
+
+
+ /**
+ * The interval patterns for this locale.
+ */
+ DateIntervalInfo* fInfo;
+
+ /**
+ * The DateFormat object used to format single pattern
+ */
+ SimpleDateFormat* fDateFormat;
+
+ /**
+ * The 2 calendars with the from and to date.
+ * could re-use the calendar in fDateFormat,
+ * but keeping 2 calendars make it clear and clean.
+ */
+ Calendar* fFromCalendar;
+ Calendar* fToCalendar;
+
+ /**
+ * Following are interval information relavent (locale) to this formatter.
+ */
+ UnicodeString fSkeleton;
+ PatternInfo fIntervalPatterns[DateIntervalInfo::kIPI_MAX_INDEX];
+};
+
+
+
+
+
+
+inline UBool
+DateIntervalFormat::operator!=(const Format& other) const {
+ return !operator==(other);
+}
+
+inline const DateIntervalInfo*
+DateIntervalFormat::getDateIntervalInfo() const {
+ return fInfo;
+}
+
+
+inline void
+DateIntervalFormat::setDateIntervalInfo(const DateIntervalInfo& newItvPattern,
+ UErrorCode& status) {
+ delete fInfo;
+ fInfo = new DateIntervalInfo(newItvPattern);
+ if ( fDateFormat ) {
+ initializePattern(status);
+ }
+}
+
+
+inline void
+DateIntervalFormat::adoptDateIntervalInfo(DateIntervalInfo* newItvPattern,
+ UErrorCode& status) {
+ delete fInfo;
+ fInfo = newItvPattern;
+ if ( fDateFormat ) {
+ initializePattern(status);
+ }
+}
+
+
+inline const DateFormat*
+DateIntervalFormat::getDateFormat() const {
+ return fDateFormat;
+}
+
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _DTITVFMT_H__
+//eof
Added: trunk/source/i18n/unicode/dtitvinf.h
===================================================================
--- trunk/source/i18n/unicode/dtitvinf.h (rev 0)
+++ trunk/source/i18n/unicode/dtitvinf.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,520 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2008, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ *
+ * File DTITVINF.H
+ *
+ *******************************************************************************
+ */
+
+#ifndef __DTITVINF_H__
+#define __DTITVINF_H__
+
+/**
+ * \file
+ * \brief C++ API: Date/Time interval patterns for formatting date/time interval
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "hash.h"
+#include "gregoimp.h"
+#include "uresimp.h"
+#include "unicode/utypes.h"
+#include "unicode/udat.h"
+#include "unicode/locid.h"
+#include "unicode/ucal.h"
+#include "unicode/dtptngen.h"
+//#include "dtitv_impl.h"
+
+
+
+U_NAMESPACE_BEGIN
+
+/**
+ * @internal ICU 4.0
+ */
+static UBool U_CALLCONV hashTableValueComparator(UHashTok val1, UHashTok val2) ;
+
+
+/**
+ * DateIntervalInfo is a public class for encapsulating localizable
+ * date time interval patterns. It is used by DateIntervalFormat.
+ *
+ * <P>
+ * Logically, the interval patterns are mappings
+ * from (skeleton, the_largest_different_calendar_field)
+ * to (date_interval_pattern).
+ *
+ * <P>
+ * A skeleton
+ * <ol>
+ * <li>
+ * only keeps the field pattern letter and ignores all other parts
+ * in a pattern, such as space, punctuations, and string literals.
+ * <li>
+ * hides the order of fields.
+ * <li>
+ * might hide a field's pattern letter length.
+ *
+ * For those non-digit calendar fields, the pattern letter length is
+ * important, such as MMM, MMMM, and MMMMM; EEE and EEEE,
+ * and the field's pattern letter length is honored.
+ *
+ * For the digit calendar fields, such as M or MM, d or dd, yy or yyyy,
+ * the field pattern length is ignored and the best match, which is defined
+ * in date time patterns, will be returned without honor the field pattern
+ * letter length in skeleton.
+ * </ol>
+ *
+ * <P>
+ * The calendar fields we support for interval formatting are:
+ * year, month, date, day-of-week, am-pm, hour, hour-of-day, and minute.
+ * Those calendar fields can be defined in the following order:
+ * year > month > date > am-pm > hour > minute
+ *
+ * The largest different calendar fields between 2 calendars is the
+ * first different calendar field in above order.
+ *
+ * For example: the largest different calendar fields between "Jan 10, 2007"
+ * and "Feb 20, 2008" is year.
+ *
+ * <P>
+ * There is a set of pre-defined static skeleton strings.
+ * There are pre-defined interval patterns for those pre-defined skeletons
+ * in locales' resource files.
+ * For example, for a skeleton UDAT_YEAR_ABBR_MONTH_DAY, which is "yMMMd",
+ * in en_US, if the largest different calendar field between date1 and date2
+ * is "year", the date interval pattern is "MMM d, yyyy - MMM d, yyyy",
+ * such as "Jan 10, 2007 - Jan 10, 2008".
+ * If the largest different calendar field between date1 and date2 is "month",
+ * the date interval pattern is "MMM d - MMM d, yyyy",
+ * such as "Jan 10 - Feb 10, 2007".
+ * If the largest different calendar field between date1 and date2 is "day",
+ * the date interval pattern is ""MMM d-d, yyyy", such as "Jan 10-20, 2007".
+ *
+ * For date skeleton, the interval patterns when year, or month, or date is
+ * different are defined in resource files.
+ * For time skeleton, the interval patterns when am/pm, or hour, or minute is
+ * different are defined in resource files.
+ *
+ *
+ * <P>
+ * There are 2 dates in interval pattern. For most locales, the first date
+ * in an interval pattern is the earlier date. There might be a locale in which
+ * the first date in an interval pattern is the later date.
+ * We use fallback format for the default order for the locale.
+ * For example, if the fallback format is "{0} - {1}", it means
+ * the first date in the interval pattern for this locale is earlier date.
+ * If the fallback format is "{1} - {0}", it means the first date is the
+ * later date.
+ * For a paticular interval pattern, the default order can be overriden
+ * by prefixing "latestFirst:" or "earliestFirst:" to the interval pattern.
+ * For example, if the fallback format is "{0}-{1}",
+ * but for skeleton "yMMMd", the interval pattern when day is different is
+ * "latestFirst:d-d MMM yy", it means by default, the first date in interval
+ * pattern is the earlier date. But for skeleton "yMMMd", when day is different,
+ * the first date in "d-d MMM yy" is the later date.
+ *
+ * <P>
+ * The recommended way to create a DateIntervalFormat object is to pass in
+ * the locale.
+ * By using a Locale parameter, the DateIntervalFormat object is
+ * initialized with the pre-defined interval patterns for a given or
+ * default locale.
+ * <P>
+ * Users can also create DateIntervalFormat object
+ * by supplying their own interval patterns.
+ * It provides flexibility for powerful usage.
+ *
+ * <P>
+ * After a DateIntervalInfo object is created, clients may modify
+ * the interval patterns using setIntervalPattern function as so desired.
+ * Currently, users can only set interval patterns when the following
+ * calendar fields are different: ERA, YEAR, MONTH, DATE, DAY_OF_MONTH,
+ * DAY_OF_WEEK, AM_PM, HOUR, HOUR_OF_DAY, and MINUTE.
+ * Interval patterns when other calendar fields are different is not supported.
+ * <P>
+ * DateIntervalInfo objects are clonable.
+ * When clients obtain a DateIntervalInfo object,
+ * they can feel free to modify it as necessary.
+ * <P>
+ * DateIntervalInfo are not expected to be subclassed.
+ * Data for a calendar is loaded out of resource bundles.
+ * To ICU 4.0, date interval patterns are only supported in Gregorian calendar.
+ * @draft ICU 4.0
+**/
+
+class U_I18N_API DateIntervalInfo : public UObject {
+public:
+ /**
+ * Default constructor.
+ * It does not initialize any interval patterns.
+ * It should be followed by setFallbackIntervalPattern() and
+ * setIntervalPattern(),
+ * and is recommended to be used only for powerful users who
+ * wants to create their own interval patterns and use them to create
+ * date interval formatter.
+ * @param status output param set to success/failure code on exit
+ * @internal ICU 4.0
+ */
+ DateIntervalInfo(UErrorCode& status);
+
+
+ /**
+ * Construct DateIntervalInfo for the given locale,
+ * @param locale the interval patterns are loaded from the Gregorian
+ * calendar data in this locale.
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+ DateIntervalInfo(const Locale& locale, UErrorCode& status);
+
+
+ /**
+ * Copy constructor.
+ * @draft ICU 4.0
+ */
+ DateIntervalInfo(const DateIntervalInfo&);
+
+ /**
+ * Assignment operator
+ * @draft ICU 4.0
+ */
+ DateIntervalInfo& operator=(const DateIntervalInfo&);
+
+ /**
+ * Clone this object polymorphically.
+ * The caller owns the result and should delete it when done.
+ * @return a copy of the object
+ * @draft ICU4.0
+ */
+ DateIntervalInfo* clone(void) const;
+
+ /**
+ * Destructor.
+ * It is virtual to be safe, but it is not designed to be subclassed.
+ * @draft ICU 4.0
+ */
+ virtual ~DateIntervalInfo();
+
+
+ /**
+ * Return true if another object is semantically equal to this one.
+ *
+ * @param other the DateIntervalInfo object to be compared with.
+ * @return true if other is semantically equal to this.
+ * @stable ICU 4.0
+ */
+ UBool operator==(const DateIntervalInfo& other) const;
+
+ /**
+ * Return true if another object is semantically unequal to this one.
+ *
+ * @param other the DateIntervalInfo object to be compared with.
+ * @return true if other is semantically unequal to this.
+ * @stable ICU 4.0
+ */
+ UBool operator!=(const DateIntervalInfo& other) const;
+
+
+
+ /**
+ * Provides a way for client to build interval patterns.
+ * User could construct DateIntervalInfo by providing
+ * a list of patterns.
+ * <P>
+ * For example:
+ * <pre>
+ * UErrorCode status = U_ZERO_ERROR;
+ * DateIntervalInfo dIntervalInfo = new DateIntervalInfo();
+ * dIntervalInfo->setIntervalPattern("yMd", UCAL_YEAR, "'from' yyyy-M-d 'to' yyyy-M-d", status);
+ * dIntervalInfo->setIntervalPattern("yMMMd", UCAL_MONTH, "'from' yyyy MMM d 'to' MMM d", status);
+ * dIntervalInfo->setIntervalPattern("yMMMd", UCAL_DAY, "yyyy MMM d-d", status, status);
+ * dIntervalInfo->setFallbackIntervalPattern("{0} ~ {1}");
+ * </pre>
+ *
+ * Restriction:
+ * Currently, users can only set interval patterns when the following
+ * calendar fields are different: ERA, YEAR, MONTH, DATE, DAY_OF_MONTH,
+ * DAY_OF_WEEK, AM_PM, HOUR, HOUR_OF_DAY, and MINUTE.
+ * Interval patterns when other calendar fields are different are
+ * not supported.
+ *
+ * @param skeleton the skeleton on which interval pattern based
+ * @param lrgDiffCalUnit the largest different calendar unit.
+ * @param intervalPattern the interval pattern on the largest different
+ * calendar unit.
+ * For example, if lrgDiffCalUnit is
+ * "year", the interval pattern for en_US when year
+ * is different could be "'from' yyyy 'to' yyyy".
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+ void setIntervalPattern(const UnicodeString& skeleton,
+ UCalendarDateFields lrgDiffCalUnit,
+ const UnicodeString& intervalPattern,
+ UErrorCode& status);
+
+ /**
+ * Get the interval pattern given the largest different calendar field.
+ * @param skeleton the skeleton
+ * @param field the largest different calendar field
+ * @param status output param set to success/failure code on exit
+ * @return interval pattern
+ * @draft ICU 4.0
+ */
+ const UnicodeString* getIntervalPattern(const UnicodeString& skeleton,
+ UCalendarDateFields field,
+ UErrorCode& status) const;
+
+ /**
+ * Get the fallback interval pattern.
+ * @return fallback interval pattern
+ * @draft ICU 4.0
+ */
+ const UnicodeString& getFallbackIntervalPattern() const;
+
+
+ /**
+ * Set the fallback interval pattern.
+ * Fall-back interval pattern is get from locale resource.
+ * If a user want to set their own fall-back interval pattern,
+ * they can do so by calling the following method.
+ * For users who construct DateIntervalInfo() by default constructor,
+ * all interval patterns ( including fall-back ) are not set,
+ * those users need to call setIntervalPattern() to set their own
+ * interval patterns, and call setFallbackIntervalPattern() to set
+ * their own fall-back interval patterns. If a certain interval pattern
+ * ( for example, the interval pattern when 'year' is different ) is not
+ * found, fall-back pattern will be used.
+ * For those users who set all their patterns ( instead of calling
+ * non-defaul constructor to let constructor get those patterns from
+ * locale ), if they do not set the fall-back interval pattern,
+ * it will be fall-back to '{date0} - {date1}'.
+ *
+ * @param fallbackPattern fall-back interval pattern.
+ * @draft ICU 4.0
+ */
+ void setFallbackIntervalPattern(const UnicodeString& fallbackPattern);
+
+
+ /** Get default order
+ * return default date ordering in interval pattern
+ * @draft ICU 4.0
+ */
+ UBool getDefaultOrder() const;
+
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 4.0
+ */
+ virtual UClassID getDynamicClassID() const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 4.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+
+private:
+ /**
+ * DateIntervalFormat will need access to
+ * getBestSkeleton(), parseSkeleton(), enum IntervalPatternIndex,
+ * and calendarFieldToPatternIndex().
+ *
+ * Instead of making above public,
+ * make DateIntervalFormat a friend of DateIntervalInfo.
+ */
+ friend class DateIntervalFormat;
+
+ friend UBool U_CALLCONV hashTableValueComparator(UHashTok val1, UHashTok val2) ;
+
+ /**
+ * Following is for saving the interval patterns.
+ * We only support interval patterns on
+ * ERA, YEAR, MONTH, DAY, AM_PM, HOUR, and MINUTE
+ */
+ enum IntervalPatternIndex
+ {
+ kIPI_ERA,
+ kIPI_YEAR,
+ kIPI_MONTH,
+ kIPI_DATE,
+ kIPI_AM_PM,
+ kIPI_HOUR,
+ kIPI_MINUTE,
+ kIPI_MAX_INDEX
+ };
+
+ /**
+ * Initialize the DateIntervalInfo from locale
+ * @param locale the given locale.
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+ void initializeData(const Locale& locale, UErrorCode& status);
+
+
+ /* Set Interval pattern.
+ *
+ * It sets interval pattern into the hash map.
+ *
+ * @param skeleton skeleton on which the interval pattern based
+ * @param lrgDiffCalUnit the largest different calendar unit.
+ * @param intervalPattern the interval pattern on the largest different
+ * calendar unit.
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+ void setIntervalPatternInternally(const UnicodeString& skeleton,
+ UCalendarDateFields lrgDiffCalUnit,
+ const UnicodeString& intervalPattern,
+ UErrorCode& status);
+
+
+ /**given an input skeleton, get the best match skeleton
+ * which has pre-defined interval pattern in resource file.
+ * Also return the difference between the input skeleton
+ * and the best match skeleton.
+ *
+ * TODO (xji): set field weight or
+ * isolate the funtionality in DateTimePatternGenerator
+ * @param skeleton input skeleton
+ * @param bestMatchDistanceInfo the difference between input skeleton
+ * and best match skeleton.
+ * 0, if there is exact match for input skeleton
+ * 1, if there is only field width difference between
+ * the best match and the input skeleton
+ * 2, the only field difference is 'v' and 'z'
+ * -1, if there is calendar field difference between
+ * the best match and the input skeleton
+ * @return best match skeleton
+ * @draft ICU 4.0
+ */
+ const UnicodeString* getBestSkeleton(const UnicodeString& skeleton,
+ int8_t& bestMatchDistanceInfo) const;
+
+
+ /**
+ * Parse skeleton, save each field's width.
+ * It is used for looking for best match skeleton,
+ * and adjust pattern field width.
+ * @param skeleton skeleton to be parsed
+ * @param skeletonFieldWidth parsed skeleton field width
+ * @draft ICU 4.0
+ */
+ static void U_EXPORT2 parseSkeleton(const UnicodeString& skeleton,
+ int32_t* skeletonFieldWidth);
+
+
+ /**
+ * Check whether one field width is numeric while the other is string.
+ *
+ * TODO (xji): make it general
+ *
+ * @param fieldWidth one field width
+ * @param anotherFieldWidth another field width
+ * @param patternLetter pattern letter char
+ * @return true if one field width is numeric and the other is string,
+ * false otherwise.
+ * @draft ICU 4.0
+ */
+ static UBool U_EXPORT2 stringNumeric(int32_t fieldWidth,
+ int32_t anotherFieldWidth,
+ char patternLetter);
+
+
+ /**
+ * Convert calendar field to the interval pattern index in
+ * hash table.
+ *
+ * Since we only support the following calendar fields:
+ * ERA, YEAR, MONTH, DATE, DAY_OF_MONTH, DAY_OF_WEEK,
+ * AM_PM, HOUR, HOUR_OF_DAY, and MINUTE,
+ * We reserve only 4 interval patterns for a skeleton.
+ *
+ * @param field calendar field
+ * @param status output param set to success/failure code on exit
+ * @return interval pattern index in hash table
+ * @draft ICU 4.0
+ */
+ static IntervalPatternIndex U_EXPORT2 calendarFieldToIntervalIndex(
+ UCalendarDateFields field,
+ UErrorCode& status);
+
+
+ /**
+ * delete hash table (of type fIntervalPatterns).
+ *
+ * @param hTable hash table to be deleted
+ * @draft ICU 4.0
+ */
+ void deleteHash(Hashtable* hTable);
+
+
+ /**
+ * initialize hash table (of type fIntervalPatterns).
+ *
+ * @param status output param set to success/failure code on exit
+ * @return hash table initialized
+ * @draft ICU 4.0
+ */
+ Hashtable* initHash(UErrorCode& status);
+
+
+
+ /**
+ * copy hash table (of type fIntervalPatterns).
+ *
+ * @param source the source to copy from
+ * @param target the target to copy to
+ * @param status output param set to success/failure code on exit
+ * @draft ICU 4.0
+ */
+ void copyHash(const Hashtable* source, Hashtable* target, UErrorCode& status);
+
+
+ // data members
+ // fallback interval pattern
+ UnicodeString fFallbackIntervalPattern;
+ // default order
+ UBool fFirstDateInPtnIsLaterDate;
+
+ // HashMap<UnicodeString, UnicodeString[kIPI_MAX_INDEX]>
+ // HashMap( skeleton, pattern[largest_different_field] )
+ Hashtable* fIntervalPatterns;
+
+};// end class DateIntervalInfo
+
+
+inline UBool
+DateIntervalInfo::operator!=(const DateIntervalInfo& other) const {
+ return !operator==(other);
+}
+
+
+inline UBool
+DateIntervalInfo::getDefaultOrder() const {
+ return fFirstDateInPtnIsLaterDate;
+}
+
+
+inline const UnicodeString&
+DateIntervalInfo::getFallbackIntervalPattern() const {
+ return fFallbackIntervalPattern;
+}
+
+
+U_NAMESPACE_END
+
+#endif
+
+#endif
+
Modified: trunk/source/i18n/unicode/dtptngen.h
===================================================================
--- trunk/source/i18n/unicode/dtptngen.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/dtptngen.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -19,6 +19,12 @@
U_NAMESPACE_BEGIN
+/**
+ * \file
+ * \brief C++ API: Date/Time Pattern Generator
+ */
+
+
class Hashtable;
class FormatParser;
class DateTimeMatcher;
Modified: trunk/source/i18n/unicode/msgfmt.h
===================================================================
--- trunk/source/i18n/unicode/msgfmt.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/msgfmt.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007, International Business Machines Corporation and others. All Rights Reserved.
+* Copyright (C) 2007-2008, International Business Machines Corporation and others. All Rights Reserved.
********************************************************************************
*
* File MSGFMT.H
@@ -541,7 +541,7 @@
* the item will be ignored.
* The caller should not delete the Format object after this call.
* @param formatName Name of the subformat.
- * @param format Format to be adopted.
+ * @param formatToAdopt Format to be adopted.
* @param status output param set to success/failure code.
* @draft ICU 4.0
*/
Modified: trunk/source/i18n/unicode/rbtz.h
===================================================================
--- trunk/source/i18n/unicode/rbtz.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/rbtz.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -25,6 +25,12 @@
class UVector;
struct Transition;
+/**
+ * a BasicTimeZone subclass implemented in terms of InitialTimeZoneRule and TimeZoneRule instances
+ * @see BasicTimeZone
+ * @see InitialTimeZoneRule
+ * @see TimeZoneRule
+ */
class U_I18N_API RuleBasedTimeZone : public BasicTimeZone {
public:
/**
Modified: trunk/source/i18n/unicode/smpdtfmt.h
===================================================================
--- trunk/source/i18n/unicode/smpdtfmt.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/smpdtfmt.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -606,6 +606,43 @@
*/
virtual void adoptCalendar(Calendar* calendarToAdopt);
+ /**
+ * Check whether the 'field' is smaller than all the fields covered in
+ * pattern, return TRUE if it is. The sequence of calendar field,
+ * from large to small is: ERA, YEAR, MONTH, DATE, AM_PM, HOUR, MINUTE,...
+ * @param field the calendar field need to check against
+ * @return TRUE if the 'field' is smaller than all the fields
+ * covered in pattern. FALSE otherwise.
+ * @internal ICU 4.0
+ */
+ UBool isFieldUnitIgnored(UCalendarDateFields field) const;
+
+
+ /**
+ * Check whether the 'field' is smaller than all the fields covered in
+ * pattern, return TRUE if it is. The sequence of calendar field,
+ * from large to small is: ERA, YEAR, MONTH, DATE, AM_PM, HOUR, MINUTE,...
+ * @param pattern the pattern to check against
+ * @param field the calendar field need to check against
+ * @return TRUE if the 'field' is smaller than all the fields
+ * covered in pattern. FALSE otherwise.
+ * @internal ICU 4.0
+ */
+ static UBool isFieldUnitIgnored(const UnicodeString& pattern,
+ UCalendarDateFields field);
+
+
+
+ /**
+ * Get the locale of this simple date formatter.
+ * It is used in DateIntervalFormat.
+ *
+ * @return locale in this simple date formatter
+ * @internal ICU 4.0
+ */
+ const Locale& getSmpFmtLocale(void) const;
+
+
private:
friend class DateFormat;
@@ -827,6 +864,15 @@
static const UDateFormatField fgPatternIndexToDateFormatField[];
/**
+ * Used to map Calendar field to field level.
+ * The larger the level, the smaller the field unit.
+ * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
+ * UCAL_MONTH level is 20.
+ */
+ static const int32_t fgCalendarFieldToLevel[];
+ static const int32_t fgPatternCharToLevel[];
+
+ /**
* The formatting pattern for this formatter.
*/
UnicodeString fPattern;
Modified: trunk/source/i18n/unicode/tblcoll.h
===================================================================
--- trunk/source/i18n/unicode/tblcoll.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/tblcoll.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,11 +1,16 @@
/*
******************************************************************************
-* Copyright (C) 1996-2007, International Business Machines Corporation and
+* Copyright (C) 1996-2008, International Business Machines Corporation and
* others. All Rights Reserved.
******************************************************************************
*/
/**
+ * \file
+ * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
+ */
+
+/**
* File tblcoll.h
*
* Created by: Helena Shih
@@ -56,10 +61,6 @@
#include "unicode/utypes.h"
-/**
- * \file
- * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
- */
#if !UCONFIG_NO_COLLATION
Modified: trunk/source/i18n/unicode/ucal.h
===================================================================
--- trunk/source/i18n/unicode/ucal.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/ucal.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -623,6 +623,18 @@
ucal_close(UCalendar *cal);
/**
+ * Open a copy of a UCalendar.
+ * This function performs a deep copy.
+ * @param cal The calendar to copy
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return A pointer to a UCalendar identical to cal.
+ * @draft ICU 4.0
+ */
+U_DRAFT UCalendar* U_EXPORT2
+ucal_clone(const UCalendar* cal,
+ UErrorCode* status);
+
+/**
* Set the TimeZone used by a UCalendar.
* A UCalendar uses a timezone for converting from Greenwich time to local time.
* @param cal The UCalendar to set.
Modified: trunk/source/i18n/unicode/ucoleitr.h
===================================================================
--- trunk/source/i18n/unicode/ucoleitr.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/ucoleitr.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/*
*******************************************************************************
-* Copyright (C) 2001-2004, International Business Machines
+* Copyright (C) 2001-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*
@@ -27,6 +27,14 @@
*/
#define UCOL_NULLORDER ((int32_t)0xFFFFFFFF)
+/**
+ * This indicates an error has occured during processing or there are no more CEs
+ * to be returned.
+ *
+ * @internal
+ */
+#define UCOL_PROCESSED_NULLORDER ((int64_t)U_INT64_MAX)
+
#include "unicode/ucol.h"
/**
@@ -176,6 +184,45 @@
ucol_previous(UCollationElements *elems, UErrorCode *status);
/**
+ * Get the processed ordering priority of the next collation element in the text.
+ * A single character may contain more than one collation element.
+ *
+ * @param elems The UCollationElements containing the text.
+ * @param ixLow a pointer to an int32_t to receive the iterator index before fetching the CE.
+ * @param ixHigh a pointer to an int32_t to receive the iterator index after fetching the CE.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The next collation elements ordering, otherwise returns UCOL_PROCESSED_NULLORDER
+ * if an error has occured or if the end of string has been reached
+ *
+ * @internal
+ */
+U_INTERNAL int64_t U_EXPORT2
+ucol_nextProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
+
+/**
+ * Get the processed ordering priority of the previous collation element in the text.
+ * A single character may contain more than one collation element.
+ * Note that internally a stack is used to store buffered collation elements.
+ * It is very rare that the stack will overflow, however if such a case is
+ * encountered, the problem can be solved by increasing the size
+ * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
+ *
+ * @param elems The UCollationElements containing the text.
+ * @param ixLow A pointer to an int32_t to receive the iterator index after fetching the CE
+ * @param ixHigh A pointer to an int32_t to receiver the iterator index before fetching the CE
+ * @param status A pointer to an UErrorCode to receive any errors. Noteably
+ * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
+ * buffer has been exhausted.
+ * @return The previous collation elements ordering, otherwise returns
+ * UCOL_PROCESSED_NULLORDER if an error has occured or if the start of
+ * string has been reached.
+ *
+ * @internal
+ */
+U_INTERNAL int64_t U_EXPORT2
+ucol_previousProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
+
+/**
* Get the maximum length of any expansion sequences that end with the
* specified comparison order.
* This is useful for .... ?
Modified: trunk/source/i18n/unicode/ucurr.h
===================================================================
--- trunk/source/i18n/unicode/ucurr.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/ucurr.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -219,30 +219,52 @@
U_STABLE UEnumeration * U_EXPORT2
ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode);
-/**
- * Finds a currency code for the given locale and date
- * @param locale the locale for which to retrieve a currency code.
- * Currency can be specified by the "currency" keyword
- * in which case it overrides the default currency code
- * @param date the date for which to retrieve a currency code for
- the given locale.
- * @param buff fill in buffer. Can be NULL for preflighting.
- * @param buffCapacity capacity of the fill in buffer. Can be 0 for
- * preflighting. If it is non-zero, the buff parameter
- * must not be NULL.
- * @param ec error code
- * @return length of the currency string. It should always be 3. If 0,
- * currency couldn't be found or the input values are
- * invalid.
- * @stable ICU 4.0
+/**
+ * Finds the number of valid currency codes for the
+ * given locale and date.
+ * @param locale the locale for which to retrieve the
+ * currency count.
+ * @param date the date for which to retrieve the
+ * currency count for the given locale.
+ * @param ec error code
+ * @return the number of currency codes for the
+ * given locale and date. If 0, currency
+ * codes couldn't be found for the input
+ * values are invalid.
+ * @draft ICU 4.0
*/
-U_STABLE int32_t U_EXPORT2
-ucurr_forLocaleAndDate(const char* locale,
- UDate date,
- UChar* buff,
- int32_t buffCapacity,
- UErrorCode* ec);
+U_DRAFT int32_t U_EXPORT2
+ucurr_countCurrencies(const char* locale,
+ UDate date,
+ UErrorCode* ec);
+/**
+ * Finds a currency code for the given locale and date
+ * @param locale the locale for which to retrieve a currency code.
+ * Currency can be specified by the "currency" keyword
+ * in which case it overrides the default currency code
+ * @param date the date for which to retrieve a currency code for
+ * the given locale.
+ * @param index the index within the available list of currency codes
+ * for the given locale on the given date.
+ * @param buff fill in buffer. Can be NULL for preflighting.
+ * @param buffCapacity capacity of the fill in buffer. Can be 0 for
+ * preflighting. If it is non-zero, the buff parameter
+ * must not be NULL.
+ * @param ec error code
+ * @return length of the currency string. It should always be 3.
+ * If 0, currency couldn't be found or the input values are
+ * invalid.
+ * @draft ICU 4.0
+ */
+U_DRAFT int32_t U_EXPORT2
+ucurr_forLocaleAndDate(const char* locale,
+ UDate date,
+ int32_t index,
+ UChar* buff,
+ int32_t buffCapacity,
+ UErrorCode* ec);
+
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif
Modified: trunk/source/i18n/unicode/udat.h
===================================================================
--- trunk/source/i18n/unicode/udat.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/udat.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -171,7 +171,103 @@
} UDateFormatStyle;
+
/**
+ * Below are a set of pre-defined skeletons.
+ * They have pre-defined interval patterns in resource files.
+ * Users are encouraged to use them in date interval format factory methods.
+ *
+ * <P>
+ * We choose to use predefined skeleton string instead of skeleton enum because
+ * we need to keep consistency between DateFormat and DateIntervalFormat
+ * factory methods.
+ * It is not good to introduce another set of enum for skeleton while having
+ * UDateFormatStyle for full pattern.
+ * And it is not good to mix the set of enum for skeleton into UDateFormatStyle.
+ * So, a new set of pre-defined skeleton is introduced below.
+ * <P>
+ *
+ * A skeleton
+ * <ul>
+ * <li>
+ * 1. only keeps the field pattern letter and ignores all other parts
+ * in a pattern, such as space, punctuations, and string literals.
+ * <li>
+ * 2. hides the order of fields.
+ * <li>
+ * 3. might hide a field's pattern letter length.
+ *
+ * For those non-digit calendar fields, the pattern letter length is
+ * important, such as MMM, MMMM, and MMMMM; EEE and EEEE,
+ * and the field's pattern letter length is honored.
+ *
+ * For the digit calendar fields, such as M or MM, d or dd, yy or yyyy,
+ * the field pattern length is ignored and the best match, which is defined
+ * in date time patterns, will be returned without honor the field pattern
+ * letter length in skeleton.
+ * </ul>
+ *
+ * <P>
+ * For example, given skeleton YEAR_MONTH_DAY_SHORT_FORMAT, which is "yMd",
+ * for English, the full pattern is "M/d/yy", which is the short format
+ * of date pattern having DAY, MONTH, and YEAR.
+ *
+ * <P>
+ * The skeletons defined below consists of the desired calendar field set
+ * (for example, DAY, MONTH, YEAR) and the format length (long, medium, short)
+ * used in date time patterns.
+ *
+ * For example, skeleton YEAR_MONTH_MEDIUM_FORMAT consists month and year,
+ * and it's corresponding full pattern is medium format date pattern.
+ * So, the skeleton is "yMMM", for English, the full pattern is "MMM yyyy",
+ * which is the format by removing DATE from medium date format.
+ *
+ * For example, skeleton YEAR_MONTH_DOW_DAY_MEDIUM_FORMAT consists day, month,
+ * year, and day-of-week, and it's corresponding full pattern is the medium
+ * format date pattern. So, the skeleton is "yMMMEEEd", for English,
+ * the full pattern is "EEE, MMM d, yyyy", which is the medium date format
+ * plus day-of-week.
+ *
+ * @draft ICU 4.0
+ */
+
+#define UDAT_MINUTE_SECOND "ms"
+#define UDAT_HOUR24_MINUTE "Hm"
+#define UDAT_HOUR24_MINUTE_SECOND "Hms"
+#define UDAT_HOUR_MINUTE_SECOND "hms"
+#define UDAT_STANDALONE_MONTH "LLLL"
+#define UDAT_ABBR_STANDALONE_MONTH "LLL"
+#define UDAT_YEAR_QUARTER "yQQQ"
+#define UDAT_YEAR_ABBR_QUARTER "yQ"
+#define UDAT_HOUR_MINUTE "hm"
+#define UDAT_YEAR "y"
+#define UDAT_DAY "d"
+#define UDAT_NUM_MONTH_WEEKDAY_DAY "MEd"
+#define UDAT_YEAR_NUM_MONTH "yM"
+#define UDAT_NUM_MONTH_DAY "Md"
+#define UDAT_YEAR_NUM_MONTH_WEEKDAY_DAY "yMEd"
+#define UDAT_ABBR_MONTH_WEEKDAY_DAY "MMMEd"
+#define UDAT_YEAR_MONTH "yMMMM"
+#define UDAT_YEAR_ABBR_MONTH "yMMM"
+#define UDAT_MONTH_DAY "MMMMd"
+#define UDAT_ABBR_MONTH_DAY "MMMd"
+#define UDAT_MONTH_WEEKDAY_DAY "MMMMEEEEd"
+#define UDAT_YEAR_ABBR_MONTH_WEEKDAY_DAY "yMMMEd"
+#define UDAT_YEAR_MONTH_WEEKDAY_DAY "yMMMMEEEEd"
+#define UDAT_YEAR_MONTH_DAY "yMMMMd"
+#define UDAT_YEAR_ABBR_MONTH_DAY "yMMMd"
+#define UDAT_YEAR_NUM_MONTH_DAY "yMd"
+#define UDAT_NUM_MONTH "M"
+#define UDAT_ABBR_MONTH "MMM"
+#define UDAT_MONTH "MMMM"
+#define UDAT_HOUR_MINUTE_GENERIC_TZ "hmv"
+#define UDAT_HOUR_MINUTE_TZ "hmz"
+#define UDAT_HOUR "h"
+#define UDAT_HOUR_GENERIC_TZ "hv"
+#define UDAT_HOUR_TZ "hz"
+
+
+/**
* FieldPosition and UFieldPosition selectors for format fields
* defined by DateFormat and UDateFormat.
* @stable ICU 3.0
Modified: trunk/source/i18n/unicode/usearch.h
===================================================================
--- trunk/source/i18n/unicode/usearch.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/unicode/usearch.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 2001-2007 IBM and others. All rights reserved.
+* Copyright (C) 2001-2008 IBM and others. All rights reserved.
**********************************************************************
* Date Name Description
* 06/28/2001 synwee Creation.
@@ -641,6 +641,126 @@
*/
U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
+/**
+ * Simple forward search for the pattern, starting at a specified index,
+ * and using using a default set search options.
+ *
+ * This is an experimental function, and is not an official part of the
+ * ICU API.
+ *
+ * The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored.
+ *
+ * The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and
+ * any Break Iterator are ignored.
+ *
+ * Matches obey the following constraints:
+ *
+ * Characters at the start or end positions of a match that are ignorable
+ * for collation are not included as part of the match, unless they
+ * are part of a combining sequence, as described below.
+ *
+ * A match will not include a partial combining sequence. Combining
+ * character sequences are considered to be inseperable units,
+ * and either match the pattern completely, or are considered to not match
+ * at all. Thus, for example, an A followed a combining accent mark will
+ * not be found when searching for a plain (unaccented) A. (unless
+ * the collation strength has been set to ignore all accents).
+ *
+ * When beginning a search, the initial starting position, startIdx,
+ * is assumed to be an acceptable match boundary with respect to
+ * combining characters. A combining sequence that spans across the
+ * starting point will not supress a match beginning at startIdx.
+ *
+ * Characters that expand to multiple collation elements
+ * (German sharp-S becoming 'ss', or the composed forms of accented
+ * characters, for example) also must match completely.
+ * Searching for a single 's' in a string containing only a sharp-s will
+ * find no match.
+ *
+ *
+ * @param strsrch the UStringSearch struct, which references both
+ * the text to be searched and the pattern being sought.
+ * @param startIdx The index into the text to begin the search.
+ * @param matchStart An out parameter, the starting index of the matched text.
+ * This parameter may be NULL.
+ * A value of -1 will be returned if no match was found.
+ * @param matchLimit Out parameter, the index of the first position following the matched text.
+ * The matchLimit will be at a suitable position for beginning a subsequent search
+ * in the input text.
+ * This parameter may be NULL.
+ * A value of -1 will be returned if no match was found.
+ *
+ * @param status Report any errors. Note that no match found is not an error.
+ * @return TRUE if a match was found, FALSE otherwise.
+ *
+ * @internal
+ */
+U_INTERNAL UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
+ int32_t startIdx,
+ int32_t *matchStart,
+ int32_t *matchLimit,
+ UErrorCode *status);
+
+/**
+ * Simple backwards search for the pattern, starting at a specified index,
+ * and using using a default set search options.
+ *
+ * This is an experimental function, and is not an official part of the
+ * ICU API.
+ *
+ * The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored.
+ *
+ * The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and
+ * any Break Iterator are ignored.
+ *
+ * Matches obey the following constraints:
+ *
+ * Characters at the start or end positions of a match that are ignorable
+ * for collation are not included as part of the match, unless they
+ * are part of a combining sequence, as described below.
+ *
+ * A match will not include a partial combining sequence. Combining
+ * character sequences are considered to be inseperable units,
+ * and either match the pattern completely, or are considered to not match
+ * at all. Thus, for example, an A followed a combining accent mark will
+ * not be found when searching for a plain (unaccented) A. (unless
+ * the collation strength has been set to ignore all accents).
+ *
+ * When beginning a search, the initial starting position, startIdx,
+ * is assumed to be an acceptable match boundary with respect to
+ * combining characters. A combining sequence that spans across the
+ * starting point will not supress a match beginning at startIdx.
+ *
+ * Characters that expand to multiple collation elements
+ * (German sharp-S becoming 'ss', or the composed forms of accented
+ * characters, for example) also must match completely.
+ * Searching for a single 's' in a string containing only a sharp-s will
+ * find no match.
+ *
+ *
+ * @param strsrch the UStringSearch struct, which references both
+ * the text to be searched and the pattern being sought.
+ * @param startIdx The index into the text to begin the search.
+ * @param matchStart An out parameter, the starting index of the matched text.
+ * This parameter may be NULL.
+ * A value of -1 will be returned if no match was found.
+ * @param matchLimit Out parameter, the index of the first position following the matched text.
+ * The matchLimit will be at a suitable position for beginning a subsequent search
+ * in the input text.
+ * This parameter may be NULL.
+ * A value of -1 will be returned if no match was found.
+ *
+ * @param status Report any errors. Note that no match found is not an error.
+ * @return TRUE if a match was found, FALSE otherwise.
+ *
+ * @internal
+ */
+U_INTERNAL UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
+ int32_t startIdx,
+ int32_t *matchStart,
+ int32_t *matchLimit,
+ UErrorCode *status);
+
#endif /* #if !UCONFIG_NO_COLLATION */
#endif
Modified: trunk/source/i18n/usearch.cpp
===================================================================
--- trunk/source/i18n/usearch.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/usearch.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -19,7 +19,11 @@
#include "usrchimp.h"
#include "cmemory.h"
#include "ucln_in.h"
+#include "uassert.h"
+// don't use Boyer-Moore
+#define BOYER_MOORE 0
+
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
// internal definition ---------------------------------------------------
@@ -49,6 +53,10 @@
ci->flags = ci->origFlags;
}
ci->fcdPosition = NULL;
+
+ ci->offsetReturn = NULL;
+ ci->offsetStore = ci->offsetBuffer;
+ ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
}
/**
@@ -235,6 +243,50 @@
}
/**
+* Adds a uint64_t value to a destination array.
+* Creates a new array if we run out of space. The caller will have to
+* manually deallocate the newly allocated array.
+* Internal method, status assumed to be success, caller has to check status
+* before calling this method. destination not to be NULL and has at least
+* size destinationlength.
+* @param destination target array
+* @param offset destination offset to add value
+* @param destinationlength target array size, return value for the new size
+* @param value to be added
+* @param increments incremental size expected
+* @param status output error if any, caller to check status before calling
+* method, status assumed to be success when passed in.
+* @return new destination array, destination if there was no new allocation
+*/
+static
+inline int64_t * addTouint64_tArray(int64_t *destination,
+ uint32_t offset,
+ uint32_t *destinationlength,
+ uint64_t value,
+ uint32_t increments,
+ UErrorCode *status)
+{
+ uint32_t newlength = *destinationlength;
+ if (offset + 1 == newlength) {
+ newlength += increments;
+ int64_t *temp = (int64_t *)allocateMemory(
+ sizeof(int64_t) * newlength, status);
+
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ uprv_memcpy(temp, destination, sizeof(int64_t) * offset);
+ *destinationlength = newlength;
+ destination = temp;
+ }
+
+ destination[offset] = value;
+
+ return destination;
+}
+
+/**
* Initializing the ce table for a pattern.
* Stores non-ignorable collation keys.
* Table size will be estimated by the size of the pattern text. Table
@@ -306,6 +358,82 @@
}
/**
+* Initializing the pce table for a pattern.
+* Stores non-ignorable collation keys.
+* Table size will be estimated by the size of the pattern text. Table
+* expansion will be perform as we go along. Adding 1 to ensure that the table
+* size definitely increases.
+* Internal method, status assumed to be a success.
+* @param strsrch string search data
+* @param status output error if any, caller to check status before calling
+* method, status assumed to be success when passed in.
+* @return total number of expansions
+*/
+static
+inline uint16_t initializePatternPCETable(UStringSearch *strsrch,
+ UErrorCode *status)
+{
+ UPattern *pattern = &(strsrch->pattern);
+ uint32_t pcetablesize = INITIAL_ARRAY_SIZE_;
+ int64_t *pcetable = pattern->PCEBuffer;
+ uint32_t patternlength = pattern->textLength;
+ UCollationElements *coleiter = strsrch->utilIter;
+
+ if (coleiter == NULL) {
+ coleiter = ucol_openElements(strsrch->collator, pattern->text,
+ patternlength, status);
+ // status will be checked in ucol_next(..) later and if it is an
+ // error UCOL_NULLORDER the result of ucol_next(..) and 0 will be
+ // returned.
+ strsrch->utilIter = coleiter;
+ } else {
+ uprv_init_collIterate(strsrch->collator, pattern->text,
+ pattern->textLength,
+ &coleiter->iteratordata_);
+ }
+
+ if (pattern->PCE != pcetable && pattern->PCE != NULL) {
+ uprv_free(pattern->PCE);
+ }
+
+ uint16_t offset = 0;
+ uint16_t result = 0;
+ int64_t pce;
+
+ uprv_init_pce(coleiter);
+
+ // ** Should processed CEs be signed or unsigned?
+ // ** (the rest of the code in this file seems to play fast-and-loose with
+ // ** whether a CE is signed or unsigned. For example, look at routine above this one.)
+ while ((pce = ucol_nextProcessed(coleiter, NULL, NULL, status)) != UCOL_PROCESSED_NULLORDER &&
+ U_SUCCESS(*status)) {
+ int64_t *temp = addTouint64_tArray(pcetable, offset, &pcetablesize,
+ pce,
+ patternlength - ucol_getOffset(coleiter) + 1,
+ status);
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ offset += 1;
+
+ if (pcetable != temp && pcetable != pattern->PCEBuffer) {
+ uprv_free(pcetable);
+ }
+
+ pcetable = temp;
+ //result += (uint16_t)(ucol_getMaxExpansion(coleiter, ce) - 1);
+ }
+
+ pcetable[offset] = 0;
+ pattern->PCE = pcetable;
+ pattern->PCELength = offset;
+
+ return result;
+}
+
+/**
* Initializes the pattern struct.
* Internal method, status assumed to be success.
* @param strsrch UStringSearch data storage
@@ -333,6 +461,16 @@
pattern->hasSuffixAccents = getFCD(patterntext, &index, length) &
LAST_BYTE_MASK_;
}
+
+ // ** HACK **
+ if (strsrch->pattern.PCE != NULL) {
+ if (strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
+ uprv_free(strsrch->pattern.PCE);
+ }
+
+ strsrch->pattern.PCE = NULL;
+ }
+
// since intializePattern is an internal method status is a success.
return initializePatternCETable(strsrch, status);
}
@@ -431,6 +569,7 @@
strsrch->pattern.defaultShiftSize = 0;
}
+#if BOYER_MOORE
/**
* Check to make sure that the match length is at the end of the character by
* using the breakiterator.
@@ -626,6 +765,7 @@
// * next character is a accent: shift to the next base character
return textoffset;
}
+#endif // #if BOYER_MOORE
/**
* sets match not found
@@ -645,6 +785,7 @@
}
}
+#if BOYER_MOORE
/**
* Gets the offset to the next safe point in text.
* ie. not the middle of a contraction, swappable characters or supplementary
@@ -804,7 +945,7 @@
ignorable = FALSE;
}
ce = getCE(strsrch, ucol_next(coleiter, &status));
- if (U_FAILURE(status)) {
+ if (U_FAILURE(status) || ce == UCOL_NULLORDER) {
return TRUE;
}
}
@@ -877,9 +1018,10 @@
int32_t firstce = strsrch->pattern.CE[0];
UCollationElements *coleiter = strsrch->textIter;
UErrorCode status = U_ZERO_ERROR;
+ int32_t ce;
setColEIterOffset(coleiter, start);
- while (getCE(strsrch, ucol_next(coleiter, &status)) != firstce) {
- if (U_FAILURE(status)) {
+ while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstce) {
+ if (U_FAILURE(status) || ce == UCOL_NULLORDER) {
return TRUE;
}
}
@@ -895,7 +1037,8 @@
}
count ++;
}
- int32_t ce = ucol_next(coleiter, &status);
+
+ ce = ucol_next(coleiter, &status);
if (U_FAILURE(status)) {
return TRUE;
}
@@ -914,6 +1057,7 @@
}
return FALSE;
}
+#endif // #if BOYER_MOORE
/**
* Checks if the offset runs out of the text string
@@ -927,6 +1071,7 @@
return offset < 0 || offset > textlength;
}
+#if BOYER_MOORE
/**
* Checks for identical match
* @param strsrch string search data
@@ -1034,7 +1179,7 @@
{
int32_t result = ucol_getOffset(coleiter);
// intricacies of the the backwards collation element iterator
- if (!forwards && inNormBuf(coleiter) && !isFCDPointerNull(coleiter)) {
+ if (FALSE && !forwards && inNormBuf(coleiter) && !isFCDPointerNull(coleiter)) {
result ++;
}
return result;
@@ -1062,7 +1207,7 @@
{
UCollationElements *coleiter = strsrch->textIter;
int32_t textlength = strsrch->search->textLength;
- int32_t temp = *start;
+ int32_t temp = *start;
const UCollator *collator = strsrch->collator;
const UChar *text = strsrch->search->text;
// This part checks if either ends of the match contains potential
@@ -2482,6 +2627,7 @@
strsrch->search->matchedLength = end - *textoffset;
return TRUE;
}
+#endif // #if BOYER_MOORE
// constructors and destructor -------------------------------------------
@@ -2604,6 +2750,7 @@
result->pattern.text = pattern;
result->pattern.textLength = patternlength;
result->pattern.CE = NULL;
+ result->pattern.PCE = NULL;
result->search->breakIter = breakiter;
#if !UCONFIG_NO_BREAK_ITERATION
@@ -2648,14 +2795,23 @@
strsrch->pattern.CE) {
uprv_free(strsrch->pattern.CE);
}
+
+ if (strsrch->pattern.PCE != NULL &&
+ strsrch->pattern.PCE != strsrch->pattern.PCEBuffer) {
+ uprv_free(strsrch->pattern.PCE);
+ }
+
ucol_closeElements(strsrch->textIter);
ucol_closeElements(strsrch->utilIter);
+
if (strsrch->ownCollator && strsrch->collator) {
ucol_close((UCollator *)strsrch->collator);
}
+
if (strsrch->search->internalBreakIter) {
ubrk_close(strsrch->search->internalBreakIter);
}
+
uprv_free(strsrch->search);
uprv_free(strsrch);
}
@@ -2863,6 +3019,7 @@
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
+
if (strsrch) {
if (strsrch->ownCollator && (strsrch->collator != collator)) {
ucol_close((UCollator *)strsrch->collator);
@@ -2892,6 +3049,14 @@
}
}
}
+
+ // **** are these calls needed?
+ // **** we call uprv_init_pce in initializePatternPCETable
+ // **** and the CEBuffer constructor...
+#if 0
+ uprv_init_pce(strsrch->textIter);
+ uprv_init_pce(strsrch->utilIter);
+#endif
}
}
@@ -3029,6 +3194,7 @@
search->reset = FALSE;
int32_t textlength = search->textLength;
if (search->isForwardSearching) {
+#if BOYER_MOORE
if (offset == textlength
|| (!search->isOverlap &&
(offset + strsrch->pattern.defaultShiftSize > textlength ||
@@ -3038,6 +3204,16 @@
setMatchNotFound(strsrch);
return USEARCH_DONE;
}
+#else
+ if (offset == textlength ||
+ (! search->isOverlap &&
+ (search->matchedIndex != USEARCH_DONE &&
+ offset + search->matchedLength > textlength))) {
+ // not enough characters to match
+ setMatchNotFound(strsrch);
+ return USEARCH_DONE;
+ }
+#endif
}
else {
// switching direction.
@@ -3101,6 +3277,14 @@
return USEARCH_DONE;
}
+#if !BOYER_MOORE
+ if (search->matchedIndex == USEARCH_DONE) {
+ ucol_setOffset(strsrch->textIter, search->textLength, status);
+ } else {
+ ucol_setOffset(strsrch->textIter, search->matchedIndex, status);
+ }
+#endif
+
return search->matchedIndex;
}
}
@@ -3136,6 +3320,7 @@
}
}
else {
+#if BOYER_MOORE
if (offset == 0 || matchedindex == 0 ||
(!search->isOverlap &&
(offset < strsrch->pattern.defaultShiftSize ||
@@ -3145,6 +3330,14 @@
setMatchNotFound(strsrch);
return USEARCH_DONE;
}
+#else
+ // Could check pattern length, but the
+ // linear search will do the right thing
+ if (offset == 0 || matchedindex == 0) {
+ setMatchNotFound(strsrch);
+ return USEARCH_DONE;
+ }
+#endif
}
if (U_SUCCESS(*status)) {
@@ -3163,6 +3356,14 @@
}
}
else {
+#if !BOYER_MOORE
+ if (search->matchedIndex != USEARCH_DONE) {
+ if (search->isOverlap) {
+ ucol_setOffset(strsrch->textIter, search->matchedIndex + search->matchedLength - 2, status);
+ }
+ }
+#endif
+
if (strsrch->search->isCanonicalMatch) {
// can't use exact here since extra accents are allowed.
usearch_handlePreviousCanonical(strsrch, status);
@@ -3235,6 +3436,695 @@
}
}
+//
+// CEI Collation Element + source text index.
+// These structs are kept in the circular buffer.
+//
+struct CEI {
+ int64_t ce;
+ int32_t lowIndex;
+ int32_t highIndex;
+};
+
+//
+// CEBuffer A circular buffer of CEs from the text being searched.
+//
+#define DEFAULT_CEBUFFER_SIZE 50
+struct CEBuffer {
+ CEI defBuf[DEFAULT_CEBUFFER_SIZE];
+ CEI *buf;
+ int32_t bufSize;
+ int32_t firstIx;
+ int32_t limitIx;
+ UCollationElements *ceIter;
+ UStringSearch *strSearch;
+
+
+
+ CEBuffer(UStringSearch *ss, UErrorCode *status);
+ ~CEBuffer();
+ const CEI *get(int32_t index);
+ const CEI *getPrevious(int32_t index);
+};
+
+
+CEBuffer::CEBuffer(UStringSearch *ss, UErrorCode *status) {
+ buf = defBuf;
+ strSearch = ss;
+ bufSize = ss->pattern.CELength+10;
+ ceIter = ss->textIter;
+ firstIx = 0;
+ limitIx = 0;
+
+ uprv_init_pce(ceIter);
+
+ if (bufSize>DEFAULT_CEBUFFER_SIZE) {
+ buf = (CEI *)uprv_malloc(bufSize * sizeof(CEI));
+ if (buf == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+}
+
+// TODO: add a reset or init function so that allocated
+// buffers can be retained & reused.
+
+CEBuffer::~CEBuffer() {
+ if (buf != defBuf) {
+ uprv_free(buf);
+ }
+}
+
+
+// Get the CE with the specified index.
+// Index must be in the range
+// n-history_size < index < n+1
+// where n is the largest index to have been fetched by some previous call to this function.
+// The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
+//
+const CEI *CEBuffer::get(int32_t index) {
+ int i = index % bufSize;
+
+ if (index>=firstIx && index<limitIx) {
+ // The request was for an entry already in our buffer.
+ // Just return it.
+ return &buf[i];
+ }
+
+ // Caller is requesting a new, never accessed before, CE.
+ // Verify that it is the next one in sequence, which is all
+ // that is allowed.
+ if (index != limitIx) {
+ U_ASSERT(FALSE);
+
+ return NULL;
+ }
+
+ // Manage the circular CE buffer indexing
+ limitIx++;
+
+ if (limitIx - firstIx >= bufSize) {
+ // The buffer is full, knock out the lowest-indexed entry.
+ firstIx++;
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+
+ buf[i].ce = ucol_nextProcessed(ceIter, &buf[i].lowIndex, &buf[i].highIndex, &status);
+
+ return &buf[i];
+}
+
+// Get the CE with the specified index.
+// Index must be in the range
+// n-history_size < index < n+1
+// where n is the largest index to have been fetched by some previous call to this function.
+// The CE value will be UCOL__PROCESSED_NULLORDER at end of input.
+//
+const CEI *CEBuffer::getPrevious(int32_t index) {
+ int i = index % bufSize;
+
+ if (index>=firstIx && index<limitIx) {
+ // The request was for an entry already in our buffer.
+ // Just return it.
+ return &buf[i];
+ }
+
+ // Caller is requesting a new, never accessed before, CE.
+ // Verify that it is the next one in sequence, which is all
+ // that is allowed.
+ if (index != limitIx) {
+ U_ASSERT(FALSE);
+
+ return NULL;
+ }
+
+ // Manage the circular CE buffer indexing
+ limitIx++;
+
+ if (limitIx - firstIx >= bufSize) {
+ // The buffer is full, knock out the lowest-indexed entry.
+ firstIx++;
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+
+ buf[i].ce = ucol_previousProcessed(ceIter, &buf[i].lowIndex, &buf[i].highIndex, &status);
+
+ return &buf[i];
+}
+
+// #define USEARCH_DEBUG
+
+#ifdef USEARCH_DEBUG
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+/*
+ * Find the next break boundary after startIndex. If the UStringSearch object
+ * has an external break iterator, use that. Otherwise use the internal character
+ * break iterator.
+ */
+static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex) {
+#if 0
+ const UChar *text = strsrch->search->text;
+ int32_t textLen = strsrch->search->textLength;
+
+ U_ASSERT(startIndex>=0);
+ U_ASSERT(startIndex<=textLen);
+
+ if (startIndex >= textLen) {
+ return startIndex;
+ }
+
+ UChar32 c;
+ int32_t i = startIndex;
+ U16_NEXT(text, i, textLen, c);
+
+ // If we are on a control character, stop without looking for combining marks.
+ // Control characters do not combine.
+ int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
+ if (gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR) {
+ return i;
+ }
+
+ // The initial character was not a control, and can thus accept trailing
+ // combining characters. Advance over however many of them there are.
+ int32_t indexOfLastCharChecked;
+ for (;;) {
+ indexOfLastCharChecked = i;
+ if (i>=textLen) {
+ break;
+ }
+ U16_NEXT(text, i, textLen, c);
+ gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
+ if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
+ break;
+ }
+ }
+ return indexOfLastCharChecked;
+#elif !UCONFIG_NO_BREAK_ITERATION
+ UBreakIterator *breakiterator = strsrch->search->breakIter;
+
+ if (breakiterator == NULL) {
+ breakiterator = strsrch->search->internalBreakIter;
+ }
+
+ if (breakiterator != NULL) {
+ return ubrk_following(breakiterator, startIndex);
+ }
+
+ return startIndex;
+#else
+ // **** or should we use the original code? ****
+ return startIndex;
+#endif
+
+}
+
+/*
+ * Returns TRUE if index is on a break boundary. If the UStringSearch
+ * has an external break iterator, test using that, otherwise test
+ * using the internal character break iterator.
+ */
+static UBool isBreakBoundary(UStringSearch *strsrch, int32_t index) {
+#if 0
+ const UChar *text = strsrch->search->text;
+ int32_t textLen = strsrch->search->textLength;
+
+ U_ASSERT(index>=0);
+ U_ASSERT(index<=textLen);
+
+ if (index>=textLen || index<=0) {
+ return FALSE;
+ }
+
+ // If the character at the current index is not a GRAPHEME_EXTEND
+ // then we can not be within a combining sequence.
+ UChar32 c;
+ U16_GET(text, 0, index, textLen, c);
+ int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
+ if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
+ return FALSE;
+ }
+
+ // We are at a combining mark. If the preceding character is anything
+ // except a CONTROL, CR or LF, we are in a combining sequence.
+ U16_PREV(text, 0, index, c);
+ gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
+ UBool combining = !(gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR);
+ return combining;
+#elif !UCONFIG_NO_BREAK_ITERATION
+ UBreakIterator *breakiterator = strsrch->search->breakIter;
+
+ if (breakiterator == NULL) {
+ breakiterator = strsrch->search->internalBreakIter;
+ }
+
+ return (breakiterator != NULL && ! ubrk_isBoundary(breakiterator, index));
+#else
+ // **** or use the original code? ****
+ return FALSE;
+#endif
+}
+
+#if 0
+static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int32_t end)
+{
+#if !UCONFIG_NO_BREAK_ITERATION
+ UBreakIterator *breakiterator = strsrch->search->breakIter;
+
+ if (breakiterator != NULL) {
+ int32_t startindex = ubrk_first(breakiterator);
+ int32_t endindex = ubrk_last(breakiterator);
+
+ // out-of-range indexes are never boundary positions
+ if (start < startindex || start > endindex ||
+ end < startindex || end > endindex) {
+ return FALSE;
+ }
+
+ return ubrk_isBoundary(breakiterator, start) &&
+ ubrk_isBoundary(breakiterator, end);
+ }
+#endif
+
+ return TRUE;
+}
+#endif
+
+
+U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
+ int32_t startIdx,
+ int32_t *matchStart,
+ int32_t *matchLimit,
+ UErrorCode *status)
+{
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
+
+ // TODO: reject search patterns beginning with a combining char.
+
+#ifdef USEARCH_DEBUG
+ if (getenv("USEARCH_DEBUG") != NULL) {
+ printf("Pattern CEs\n");
+ for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
+ printf(" %8x", strsrch->pattern.CE[ii]);
+ }
+ printf("\n");
+ }
+
+#endif
+ // Input parameter sanity check.
+ // TODO: should input indicies clip to the text length
+ // in the same way that UText does.
+ if(strsrch->pattern.CELength == 0 ||
+ startIdx < 0 ||
+ startIdx > strsrch->search->textLength ||
+ strsrch->pattern.CE == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+
+ if (strsrch->pattern.PCE == NULL) {
+ initializePatternPCETable(strsrch, status);
+ }
+
+ ucol_setOffset(strsrch->textIter, startIdx, status);
+ CEBuffer ceb(strsrch, status);
+
+
+ int32_t targetIx = 0;
+ const CEI *targetCEI;
+ int32_t patIx;
+ UBool found;
+
+ int32_t mStart = -1;
+ int32_t mLimit = -1;
+ int32_t minLimit;
+ int32_t maxLimit;
+
+
+
+ // Outer loop moves over match starting positions in the
+ // target CE space.
+ for(targetIx=0; ; targetIx++)
+ {
+ found = TRUE;
+ // Inner loop checks for a match beginning at each
+ // position from the outer loop.
+ for (patIx=0; patIx<strsrch->pattern.CELength; patIx++) {
+ int64_t patCE = strsrch->pattern.PCE[patIx];
+ targetCEI = ceb.get(targetIx+patIx);
+ // Compare CE from target string with CE from the pattern.
+ // Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
+ // which will fail the compare, below.
+ if (targetCEI->ce != patCE) {
+ found = FALSE;
+ break;
+ }
+ }
+
+ if (!found && targetCEI->ce != UCOL_PROCESSED_NULLORDER) {
+ // No match at this targetIx. Try again at the next.
+ continue;
+ }
+
+ if (!found) {
+ // No match at all, we have run off the end of the target text.
+ break;
+ }
+
+
+ // We have found a match in CE space.
+ // Now determine the bounds in string index space.
+ // There still is a chance of match failure if the CE range not correspond to
+ // an acceptable character range.
+ //
+ const CEI *firstCEI = ceb.get(targetIx);
+ const CEI *lastCEI = ceb.get(targetIx + strsrch->pattern.CELength - 1);
+ const CEI *nextCEI = ceb.get(targetIx + strsrch->pattern.CELength);
+
+ // targetCEI = ceb.get(targetIx+strsrch->pattern.CELength);
+ // maxLimit = targetCEI->lowIndex;
+ mStart = firstCEI->lowIndex;
+ minLimit = lastCEI->lowIndex;
+ maxLimit = nextCEI->lowIndex;
+
+ // Look at the CE following the match. If it is UCOL_NULLORDER the match
+ // extended to the end of input, and the match is good.
+
+ // Look at the high and low indices of the CE following the match. If
+ // they are the same it means one of two things:
+ // 1. The match extended to the last CE from the target text, which is OK, or
+ // 2. The last CE that was part of the match is in an expansion that extends
+ // to the first CE after the match. In this case, we reject the match.
+ if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_PROCESSED_NULLORDER) {
+ found = FALSE;
+ }
+
+
+ // Check for the start of the match being within a combining sequence.
+ // This can happen if the pattern itself begins with a combining char, and
+ // the match found combining marks in the target text that were attached
+ // to something else.
+ // This type of match should be rejected for not completely consuming a
+ // combining sequence.
+ if (isBreakBoundary(strsrch, mStart)) {
+ found = FALSE;
+ }
+
+ // Check for the start of the match being within an Collation Element Expansion,
+ // meaning that the first char of the match is only partially matched.
+ // With exapnsions, the first CE will report the index of the source
+ // character, and all subsequent (expansions) CEs will report the source index of the
+ // _following_ character.
+ int32_t secondIx = firstCEI->highIndex;
+ if (mStart == secondIx) {
+ found = FALSE;
+ }
+
+ // Advance the match end position to the first acceptable match boundary.
+ // This advances the index over any combining charcters.
+ mLimit = maxLimit;
+ if (minLimit < maxLimit) {
+ int32_t nba = nextBoundaryAfter(strsrch, minLimit);
+
+ if (nba >= lastCEI->highIndex) {
+ mLimit = nba;
+ }
+ }
+
+ #ifdef USEARCH_DEBUG
+ if (getenv("USEARCH_DEBUG") != NULL) {
+ printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLimit, mLimit);
+ }
+ #endif
+
+ // If advancing to the end of a combining sequence in character indexing space
+ // advanced us beyond the end of the match in CE space, reject this match.
+ if (mLimit > maxLimit) {
+ found = FALSE;
+ }
+
+ if (isBreakBoundary(strsrch, mLimit)) {
+ found = FALSE;
+ }
+
+ if (found) {
+ break;
+ }
+ }
+
+ #ifdef USEARCH_DEBUG
+ if (getenv("USEARCH_DEBUG") != NULL) {
+ printf("Target CEs [%d .. %d]\n", ceb.firstIx, ceb.limitIx);
+ int32_t lastToPrint = ceb.limitIx+2;
+ for (int ii=ceb.firstIx; ii<lastToPrint; ii++) {
+ printf("%8x@%d ", ceb.get(ii)->ce, ceb.get(ii)->srcIndex);
+ }
+ printf("\n%s\n", found? "match found" : "no match");
+ }
+ #endif
+
+ // All Done. Store back the match bounds to the caller.
+ //
+ if (found==FALSE) {
+ mLimit = -1;
+ mStart = -1;
+ }
+
+ if (matchStart != NULL) {
+ *matchStart= mStart;
+ }
+
+ if (matchLimit != NULL) {
+ *matchLimit = mLimit;
+ }
+
+ return found;
+}
+
+
+U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
+ int32_t startIdx,
+ int32_t *matchStart,
+ int32_t *matchLimit,
+ UErrorCode *status)
+{
+ if (U_FAILURE(*status)) {
+ return FALSE;
+ }
+
+ // TODO: reject search patterns beginning with a combining char.
+
+#ifdef USEARCH_DEBUG
+ if (getenv("USEARCH_DEBUG") != NULL) {
+ printf("Pattern CEs\n");
+ for (int ii=0; ii<strsrch->pattern.CELength; ii++) {
+ printf(" %8x", strsrch->pattern.CE[ii]);
+ }
+ printf("\n");
+ }
+
+#endif
+ // Input parameter sanity check.
+ // TODO: should input indicies clip to the text length
+ // in the same way that UText does.
+ if(strsrch->pattern.CELength == 0 ||
+ startIdx < 0 ||
+ startIdx > strsrch->search->textLength ||
+ strsrch->pattern.CE == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return FALSE;
+ }
+
+ if (strsrch->pattern.PCE == NULL) {
+ initializePatternPCETable(strsrch, status);
+ }
+
+ CEBuffer ceb(strsrch, status);
+ int32_t targetIx = 0;
+
+ /*
+ * Pre-load the buffer with the CE's for the grapheme
+ * after our starting position so that we're sure that
+ * we can look at the CE following the match when we
+ * check the match boundaries.
+ *
+ * This will also pre-fetch the first CE that we'll
+ * consider for the match.
+ */
+ if (startIdx < strsrch->search->textLength) {
+ UBreakIterator *bi = strsrch->search->internalBreakIter;
+ int32_t next = ubrk_following(bi, startIdx);
+
+ ucol_setOffset(strsrch->textIter, next, status);
+
+ for (targetIx = 0; ; targetIx += 1) {
+ if (ceb.getPrevious(targetIx)->lowIndex < startIdx) {
+ break;
+ }
+ }
+ } else {
+ ucol_setOffset(strsrch->textIter, startIdx, status);
+ }
+
+
+ const CEI *targetCEI;
+ int32_t patIx;
+ UBool found;
+
+ int32_t limitIx = targetIx;
+ int32_t mStart = -1;
+ int32_t mLimit = -1;
+ int32_t minLimit;
+ int32_t maxLimit;
+
+
+
+ // Outer loop moves over match starting positions in the
+ // target CE space.
+ for(targetIx = limitIx; ; targetIx += 1)
+ {
+ found = TRUE;
+ // Inner loop checks for a match beginning at each
+ // position from the outer loop.
+ for (patIx = strsrch->pattern.CELength - 1; patIx >= 0; patIx -= 1) {
+ int64_t patCE = strsrch->pattern.PCE[patIx];
+
+ targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.CELength - 1 - patIx);
+ // Compare CE from target string with CE from the pattern.
+ // Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
+ // which will fail the compare, below.
+ if (targetCEI->ce != patCE) {
+ found = FALSE;
+ break;
+ }
+ }
+
+ if (!found && targetCEI->ce != UCOL_PROCESSED_NULLORDER) {
+ // No match at this targetIx. Try again at the next.
+ continue;
+ }
+
+ if (!found) {
+ // No match at all, we have run off the end of the target text.
+ break;
+ }
+
+
+ // We have found a match in CE space.
+ // Now determine the bounds in string index space.
+ // There still is a chance of match failure if the CE range not correspond to
+ // an acceptable character range.
+ //
+ const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.CELength - 1);
+ const CEI *lastCEI = ceb.getPrevious(targetIx);
+ const CEI *nextCEI = targetIx > 0? ceb.getPrevious(targetIx - 1) : NULL;
+
+ mStart = firstCEI->lowIndex;
+ minLimit = lastCEI->lowIndex;
+ maxLimit = targetIx > 0? nextCEI->lowIndex : lastCEI->highIndex;
+
+ // Look at the CE following the match. If it is UCOL_NULLORDER the match
+ // extended to the end of input, and the match is good.
+
+ // Look at the high and low indices of the CE following the match. If
+ // they are the same it means one of two things:
+ // 1. The match extended to the last CE from the target text, which is OK, or
+ // 2. The last CE that was part of the match is in an expansion that extends
+ // to the first CE after the match. In this case, we reject the match.
+ if (targetIx >= 1) {
+ if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_PROCESSED_NULLORDER) {
+ found = FALSE;
+ }
+ }
+
+
+ // Check for the start of the match being within a combining sequence.
+ // This can happen if the pattern itself begins with a combining char, and
+ // the match found combining marks in the target text that were attached
+ // to something else.
+ // This type of match should be rejected for not completely consuming a
+ // combining sequence.
+ if (isBreakBoundary(strsrch, mStart)) {
+ found = FALSE;
+ }
+
+ // Look at the high index of the first CE in the match. If it's the same as the
+ // low index, the first CE in the match is in the middle of an expansion.
+ if (mStart == firstCEI->highIndex) {
+ found = FALSE;
+ }
+
+ // Advance the match end position to the first acceptable match boundary.
+ // This advances the index over any combining charcters.
+ mLimit = maxLimit;
+ if (/*targetIx > 0 &&*/ minLimit < maxLimit) {
+ int32_t nba = nextBoundaryAfter(strsrch, minLimit);
+
+ if (nba >= lastCEI->highIndex) {
+ mLimit = nba;
+ }
+ }
+
+ #ifdef USEARCH_DEBUG
+ if (getenv("USEARCH_DEBUG") != NULL) {
+ printf("minLimit, maxLimit, mLimit = %d, %d, %d\n", minLimit, maxLimit, mLimit);
+ }
+ #endif
+
+ // If advancing to the end of a combining sequence in character indexing space
+ // advanced us beyond the end of the match in CE space, reject this match.
+ if (mLimit > maxLimit) {
+ found = FALSE;
+ }
+
+ // Make sure the end of the match is on a break boundary
+ if (isBreakBoundary(strsrch, mLimit)) {
+ found = FALSE;
+ }
+
+ if (found) {
+ break;
+ }
+ }
+
+ #ifdef USEARCH_DEBUG
+ if (getenv("USEARCH_DEBUG") != NULL) {
+ printf("Target CEs [%d .. %d]\n", ceb.firstIx, ceb.limitIx);
+ int32_t lastToPrint = ceb.limitIx+2;
+ for (int ii=ceb.firstIx; ii<lastToPrint; ii++) {
+ printf("%8x@%d ", ceb.get(ii)->ce, ceb.get(ii)->srcIndex);
+ }
+ printf("\n%s\n", found? "match found" : "no match");
+ }
+ #endif
+
+ // All Done. Store back the match bounds to the caller.
+ //
+ if (found==FALSE) {
+ mLimit = -1;
+ mStart = -1;
+ }
+
+ if (matchStart != NULL) {
+ *matchStart= mStart;
+ }
+
+ if (matchLimit != NULL) {
+ *matchLimit = mLimit;
+ }
+
+ return found;
+}
+
+
+
+
// internal use methods declared in usrchimp.h -----------------------------
UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
@@ -3244,6 +4134,7 @@
return FALSE;
}
+#if BOYER_MOORE
UCollationElements *coleiter = strsrch->textIter;
int32_t textlength = strsrch->search->textLength;
int32_t *patternce = strsrch->pattern.CE;
@@ -3332,6 +4223,20 @@
}
setMatchNotFound(strsrch);
return FALSE;
+#else
+ int32_t textOffset = ucol_getOffset(strsrch->textIter);
+ int32_t start = -1;
+ int32_t end = -1;
+
+ if (usearch_search(strsrch, textOffset, &start, &end, status)) {
+ strsrch->search->matchedIndex = start;
+ strsrch->search->matchedLength = end - start;
+ return TRUE;
+ } else {
+ setMatchNotFound(strsrch);
+ return FALSE;
+ }
+#endif
}
UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
@@ -3341,6 +4246,7 @@
return FALSE;
}
+#if BOYER_MOORE
UCollationElements *coleiter = strsrch->textIter;
int32_t textlength = strsrch->search->textLength;
int32_t *patternce = strsrch->pattern.CE;
@@ -3430,6 +4336,20 @@
}
setMatchNotFound(strsrch);
return FALSE;
+#else
+ int32_t textOffset = ucol_getOffset(strsrch->textIter);
+ int32_t start = -1;
+ int32_t end = -1;
+
+ if (usearch_search(strsrch, textOffset, &start, &end, status)) {
+ strsrch->search->matchedIndex = start;
+ strsrch->search->matchedLength = end - start;
+ return TRUE;
+ } else {
+ setMatchNotFound(strsrch);
+ return FALSE;
+ }
+#endif
}
UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
@@ -3439,6 +4359,7 @@
return FALSE;
}
+#if BOYER_MOORE
UCollationElements *coleiter = strsrch->textIter;
int32_t *patternce = strsrch->pattern.CE;
int32_t patterncelength = strsrch->pattern.CELength;
@@ -3530,6 +4451,20 @@
}
setMatchNotFound(strsrch);
return FALSE;
+#else
+ int32_t textOffset = ucol_getOffset(strsrch->textIter);
+ int32_t start = -1;
+ int32_t end = -1;
+
+ if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
+ strsrch->search->matchedIndex = start;
+ strsrch->search->matchedLength = end - start;
+ return TRUE;
+ } else {
+ setMatchNotFound(strsrch);
+ return FALSE;
+ }
+#endif
}
UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
@@ -3540,6 +4475,7 @@
return FALSE;
}
+#if BOYER_MOORE
UCollationElements *coleiter = strsrch->textIter;
int32_t *patternce = strsrch->pattern.CE;
int32_t patterncelength = strsrch->pattern.CELength;
@@ -3638,6 +4574,20 @@
}
setMatchNotFound(strsrch);
return FALSE;
+#else
+ int32_t textOffset = ucol_getOffset(strsrch->textIter);
+ int32_t start = -1;
+ int32_t end = -1;
+
+ if (usearch_searchBackwards(strsrch, textOffset, &start, &end, status)) {
+ strsrch->search->matchedIndex = start;
+ strsrch->search->matchedLength = end - start;
+ return TRUE;
+ } else {
+ setMatchNotFound(strsrch);
+ return FALSE;
+ }
+#endif
}
#endif /* #if !UCONFIG_NO_COLLATION */
Modified: trunk/source/i18n/usrchimp.h
===================================================================
--- trunk/source/i18n/usrchimp.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/i18n/usrchimp.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 2001-2007 IBM and others. All rights reserved.
+* Copyright (C) 2001-2008 IBM and others. All rights reserved.
**********************************************************************
* Date Name Description
* 08/13/2001 synwee Creation.
@@ -31,8 +31,8 @@
// value USEARCH_DONE is the default value
// if we are not at the start of the text or the end of the text,
// depending on the iteration direction and matchedIndex is USEARCH_DONE
- // it means that we can find any more matches in that particular direction
- int32_t matchedIndex;
+ // it means that we can't find any more matches in that particular direction
+ int32_t matchedIndex;
int32_t matchedLength;
UBool isForwardSearching;
UBool reset;
@@ -45,6 +45,9 @@
int32_t CELength;
int32_t *CE;
int32_t CEBuffer[INITIAL_ARRAY_SIZE_];
+ int32_t PCELength;
+ int64_t *PCE;
+ int64_t PCEBuffer[INITIAL_ARRAY_SIZE_];
UBool hasPrefixAccents;
UBool hasSuffixAccents;
int16_t defaultShiftSize;
Modified: trunk/source/icudefs.mk.in
===================================================================
--- trunk/source/icudefs.mk.in 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/icudefs.mk.in 2008-05-31 14:31:31 UTC (rev 153)
@@ -7,6 +7,8 @@
#
# Some of these variables are overridden in the config/mh-* files.
+#
+# Please be sure to update config/Makefile.inc.in if you add something here.
#
# Shell to use
Modified: trunk/source/layout/IndicClassTables.cpp
===================================================================
--- trunk/source/layout/IndicClassTables.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/layout/IndicClassTables.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -48,6 +48,7 @@
#define _m2 (CC_SPLIT_VOWEL_PIECE_2 | CF_LENGTH_MARK)
#define _m3 (CC_SPLIT_VOWEL_PIECE_3 | CF_LENGTH_MARK)
#define _vr (CC_VIRAMA)
+#define _al (CC_AL_LAKUNA)
// split matras
#define _s1 (_dv | _x1)
@@ -206,7 +207,7 @@
_iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _xx, _ct, _ct, _ct, _ct, _ct, _ct, // 0D90 - 0D9F
_ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, // 0DA0 - 0DAF
_ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _xx, _xx, // 0DB0 - 0DBF
- _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _vr, _xx, _xx, _xx, _xx, _dr, // 0DC0 - 0DCF
+ _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _al, _xx, _xx, _xx, _xx, _dr, // 0DC0 - 0DCF
_dr, _dr, _da, _da, _db, _xx, _db, _xx, _dr, _dl, _s1, _dl, _s2, _s3, _s4, _dr, // 0DD0 - 0DDF
_xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0DE0 - 0DEF
_xx, _xx, _dr, _dr, _xx // 0DF0 - 0DF4
@@ -229,7 +230,7 @@
static const SplitMatra mlymSplitTable[] = {{0x0D46, 0x0D3E}, {0x0D47, 0x0D3E}, {0x0D46, 0x0D57}};
-static const SplitMatra sinhSplitTable[] = {{0x0DD9, 0x0DCA}, {0x0DD9, 0x0DCF}, {0x0DD9, 0x0DCF,0x0DCA},
+static const SplitMatra sinhSplitTable[] = {{0x0DD9, 0x0DCA}, {0x0DD9, 0x0DCF}, {0x0DD9, 0x0DCF, 0x0DCA},
{0x0DD9, 0x0DDF}};
//
// Script Flags
@@ -248,7 +249,7 @@
#define TELU_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | SF_FILTER_ZERO_WIDTH | 3)
#define KNDA_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | SF_FILTER_ZERO_WIDTH | 3)
#define MLYM_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT /*| SF_FILTER_ZERO_WIDTH*/)
-#define SINH_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT)
+#define SINH_SCRIPT_FLAGS (SF_NO_POST_BASE_LIMIT)
//
// Indic Class Tables
Modified: trunk/source/layout/IndicReordering.cpp
===================================================================
--- trunk/source/layout/IndicReordering.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/layout/IndicReordering.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -73,8 +73,8 @@
LEUnicode fLengthMark;
le_int32 fLengthMarkIndex;
- LEUnicode fVirama;
- le_int32 fViramaIndex;
+ LEUnicode fAlLakuna;
+ le_int32 fAlLakunaIndex;
FeatureMask fMatraFeatures;
@@ -97,9 +97,9 @@
if (IndicClassTable::isLengthMark(matraClass)) {
fLengthMark = matra;
fLengthMarkIndex = matraIndex;
- } else if (IndicClassTable::isVirama(matraClass)) {
- fVirama = matra;
- fViramaIndex = matraIndex;
+ } else if (IndicClassTable::isAlLakuna(matraClass)) {
+ fAlLakuna = matra;
+ fAlLakunaIndex = matraIndex;
} else {
switch (matraClass & CF_POS_MASK) {
case CF_POS_BEFORE:
@@ -133,7 +133,7 @@
IndicReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage, MPreFixups *mpreFixups)
: fSyllableCount(0), fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage),
fMpre(0), fMpreIndex(0), fMbelow(0), fMbelowIndex(0), fMabove(0), fMaboveIndex(0),
- fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fVirama(0), fViramaIndex(0),
+ fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fAlLakuna(0), fAlLakunaIndex(0),
fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups),
fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0),
fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0)
@@ -150,7 +150,7 @@
{
fSyllableCount += 1;
- fMpre = fMbelow = fMabove = fMpost = fLengthMark = fVirama = 0;
+ fMpre = fMbelow = fMabove = fMpost = fLengthMark = fAlLakuna = 0;
fMPreOutIndex = -1;
fVMabove = fVMpost = 0;
@@ -255,11 +255,11 @@
}
}
- // Handles virama in Sinhala split vowels.
- void writeVirama()
+ // Handles Al-Lakuna in Sinhala split vowels.
+ void writeAlLakuna()
{
- if (fVirama != 0) {
- writeChar(fVirama, fViramaIndex, fMatraFeatures);
+ if (fAlLakuna != 0) {
+ writeChar(fAlLakuna, fAlLakunaIndex, fMatraFeatures);
}
}
@@ -371,20 +371,21 @@
static const le_int8 stateTable[][CC_COUNT] =
{
-// xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw
- { 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1}, // 0 - ground state
- {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
- {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12}, // 2 - consonant with nukta
- {-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12}, // 3 - consonant
- {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7}, // 4 - consonant virama
- {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
- {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
- {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama
- {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1}, // 8 - independent vowels that can take a virama
- {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1}, // 9 - first part of split vowel
- {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1}, // 10 - second part of split vowel
- {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1}, // 11 - independent vowels that can take an iv
- {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?)
+// xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw al
+ { 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1, 1}, // 0 - ground state
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
+ {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12, -1}, // 2 - consonant with nukta
+ {-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12, 13}, // 3 - consonant
+ {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7, -1}, // 4 - consonant virama
+ {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
+ {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
+ {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama
+ {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1}, // 8 - independent vowels that can take a virama
+ {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1, -1}, // 9 - first part of split vowel
+ {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1, -1}, // 10 - second part of split vowel
+ {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1, -1}, // 11 - independent vowels that can take an iv
+ {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, 7}, // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?)
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 13 - consonant al-lakuna ZWJ consonant
};
@@ -511,7 +512,7 @@
}
output.writeLengthMark();
- output.writeVirama();
+ output.writeAlLakuna();
if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) {
output.writeVMabove();
@@ -643,7 +644,8 @@
bcSpan += 1;
}
- if (baseConsonant == lastConsonant && bcSpan < markStart && classTable->isVirama(chars[bcSpan])) {
+ if (baseConsonant == lastConsonant && bcSpan < markStart &&
+ (classTable->isVirama(chars[bcSpan]) || classTable->isAlLakuna(chars[bcSpan]))) {
bcSpan += 1;
if (bcSpan < markStart && chars[bcSpan] == C_SIGN_ZWNJ) {
@@ -719,7 +721,7 @@
}
output.writeLengthMark();
- output.writeVirama();
+ output.writeAlLakuna();
// write reph
if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) {
Modified: trunk/source/layout/IndicReordering.h
===================================================================
--- trunk/source/layout/IndicReordering.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/layout/IndicReordering.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/*
*
- * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
+ * (C) Copyright IBM Corp. 1998-2008 - All Rights Reserved
*
*/
@@ -37,7 +37,8 @@
#define CC_SPLIT_VOWEL_PIECE_3 12U
#define CC_VIRAMA 13U
#define CC_ZERO_WIDTH_MARK 14U
-#define CC_COUNT 15U
+#define CC_AL_LAKUNA 15U
+#define CC_COUNT 16U
// Character class flags
#define CF_CLASS_MASK 0x0000FFFFU
@@ -98,6 +99,7 @@
inline le_bool isConsonant(LEUnicode ch) const;
inline le_bool isReph(LEUnicode ch) const;
inline le_bool isVirama(LEUnicode ch) const;
+ inline le_bool isAlLakuna(LEUnicode ch) const;
inline le_bool isNukta(LEUnicode ch) const;
inline le_bool isVattu(LEUnicode ch) const;
inline le_bool isMatra(LEUnicode ch) const;
@@ -112,6 +114,7 @@
inline static le_bool isConsonant(CharClass charClass);
inline static le_bool isReph(CharClass charClass);
inline static le_bool isVirama(CharClass charClass);
+ inline static le_bool isAlLakuna(CharClass charClass);
inline static le_bool isNukta(CharClass charClass);
inline static le_bool isVattu(CharClass charClass);
inline static le_bool isMatra(CharClass charClass);
@@ -193,6 +196,11 @@
return (charClass & CF_CLASS_MASK) == CC_VIRAMA;
}
+inline le_bool IndicClassTable::isAlLakuna(CharClass charClass)
+{
+ return (charClass & CF_CLASS_MASK) == CC_AL_LAKUNA;
+}
+
inline le_bool IndicClassTable::isVattu(CharClass charClass)
{
return (charClass & CF_VATTU) != 0;
@@ -255,6 +263,11 @@
return isVirama(getCharClass(ch));
}
+inline le_bool IndicClassTable::isAlLakuna(LEUnicode ch) const
+{
+ return isAlLakuna(getCharClass(ch));
+}
+
inline le_bool IndicClassTable::isNukta(LEUnicode ch) const
{
return isNukta(getCharClass(ch));
Modified: trunk/source/layout/OpenTypeLayoutEngine.cpp
===================================================================
--- trunk/source/layout/OpenTypeLayoutEngine.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/layout/OpenTypeLayoutEngine.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -177,16 +177,8 @@
return 0;
}
- if (LE_FAILURE(success)) {
- LE_DELETE_ARRAY(outChars);
- return 0;
- }
-
CanonShaping::reorderMarks(&chars[offset], count, rightToLeft, outChars, glyphStorage);
}
- if (LE_FAILURE(success)) {
- return 0;
- }
glyphStorage.allocateGlyphArray(count, rightToLeft, success);
glyphStorage.allocateAuxData(success);
@@ -355,12 +347,6 @@
}
LEGlyphID zwnj = fFontInstance->mapCharToGlyph(0x200C);
-#if 0
- // The nbsp translation was only here to make one
- // broken font work. Not a good idea in general...
- LEGlyphID nbsp = fFontInstance->mapCharToGlyph(0x00A0);
- LEGlyphID space = fFontInstance->mapCharToGlyph(0x0020);
-#endif
if (zwnj != 0x0000) {
for (le_int32 g = 0; g < glyphCount; g += 1) {
@@ -368,10 +354,6 @@
if (glyph == zwnj) {
glyphStorage[g] = LE_SET_GLYPH(glyph, 0xFFFF);
- #if 0
- } else if (glyph == nbsp) {
- glyphStorage[g] = LE_SET_GLYPH(glyph, space);
- #endif
}
}
}
Modified: trunk/source/layout/ScriptAndLanguage.cpp
===================================================================
--- trunk/source/layout/ScriptAndLanguage.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/layout/ScriptAndLanguage.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -47,7 +47,7 @@
* to be unsorted.
*/
le_uint16 count = SWAPW(scriptCount);
- le_uint16 limit = ((SWAPW(scriptRecordArray[0].offset) - sizeof(ScriptListTable)) / sizeof sizeof(scriptRecordArray)) + ANY_NUMBER;
+ le_uint16 limit = ((SWAPW(scriptRecordArray[0].offset) - sizeof(ScriptListTable)) / sizeof(scriptRecordArray)) + ANY_NUMBER;
Offset scriptTableOffset = 0;
if (count > limit) {
Modified: trunk/source/layoutex/layout/RunArrays.h
===================================================================
--- trunk/source/layoutex/layout/RunArrays.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/layoutex/layout/RunArrays.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/*
**********************************************************************
- * Copyright (C) 2003-2007, International Business Machines
+ * Copyright (C) 2003-2008, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
@@ -502,6 +502,9 @@
virtual void init(le_int32 capacity);
virtual void grow(le_int32 capacity);
+ /**
+ * @internal
+ */
const Locale **fLocales;
private:
Modified: trunk/source/layoutex/layout/plruns.h
===================================================================
--- trunk/source/layoutex/layout/plruns.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/layoutex/layout/plruns.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/*
*
- * (C) Copyright IBM Corp. 1998-2007 - All Rights Reserved
+ * (C) Copyright IBM Corp. 1998-2008 - All Rights Reserved
*
*/
@@ -13,8 +13,17 @@
#include "layout/loengine.h"
+/**
+ * Opaque datatype representing an array of font runs
+ */
typedef void pl_fontRuns;
+/**
+ * Opaque datatype representing an array of value runs
+ */
typedef void pl_valueRuns;
+/**
+ * Opaque datatype representing an array of locale runs
+ */
typedef void pl_localeRuns;
/**
Modified: trunk/source/test/cintltst/callcoll.c
===================================================================
--- trunk/source/test/cintltst/callcoll.c 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/cintltst/callcoll.c 2008-05-31 14:31:31 UTC (rev 153)
@@ -54,6 +54,9 @@
#include "cmemory.h"
#include "ucol_imp.h"
+/* set to 1 to test offsets in backAndForth() */
+#define TEST_OFFSETS 0
+
/* perform test with strength PRIMARY */
static void TestPrimary(void);
@@ -436,14 +439,15 @@
* Return an integer array containing all of the collation orders
* returned by calls to next on the specified iterator
*/
-int32_t* getOrders(UCollationElements *iter, int32_t *orderLength)
+OrderAndOffset* getOrders(UCollationElements *iter, int32_t *orderLength)
{
UErrorCode status;
int32_t order;
int32_t maxSize = 100;
int32_t size = 0;
- int32_t *temp;
- int32_t *orders =(int32_t*)malloc(sizeof(int32_t) * maxSize);
+ int32_t offset = ucol_getOffset(iter);
+ OrderAndOffset *temp;
+ OrderAndOffset *orders =(OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize);
status= U_ZERO_ERROR;
@@ -452,22 +456,26 @@
if (size == maxSize)
{
maxSize *= 2;
- temp = (int32_t*)malloc(sizeof(int32_t) * maxSize);
+ temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * maxSize);
- memcpy(temp, orders, size * sizeof(int32_t));
+ memcpy(temp, orders, size * sizeof(OrderAndOffset));
free(orders);
orders = temp;
}
- orders[size++] = order;
+ orders[size].order = order;
+ orders[size].offset = offset;
+
+ offset = ucol_getOffset(iter);
+ size += 1;
}
if (maxSize > size && size > 0)
{
- temp = (int32_t*)malloc(sizeof(int32_t) * size);
+ temp = (OrderAndOffset *)malloc(sizeof(OrderAndOffset) * size);
- memcpy(temp, orders, size * sizeof(int32_t));
+ memcpy(temp, orders, size * sizeof(OrderAndOffset));
free(orders);
orders = temp;
@@ -486,8 +494,7 @@
int32_t index, o;
UErrorCode status = U_ZERO_ERROR;
int32_t orderLength = 0;
- int32_t *orders;
- orders= getOrders(iter, &orderLength);
+ OrderAndOffset *orders = getOrders(iter, &orderLength);
/* Now go through it backwards and make sure we get the same values */
@@ -495,49 +502,60 @@
ucol_reset(iter);
/* synwee : changed */
- while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
- {
- if (o != orders[-- index])
- {
+ while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) {
+ int32_t offset = ucol_getOffset(iter);
+
+ index -= 1;
+ if (o != orders[index].order) {
if (o == 0)
index ++;
- else
- {
- while (index > 0 && orders[-- index] == 0)
- {
+ else {
+ while (index > 0 && orders[-- index].order == 0) {
+ /* nothing... */
}
- if (o != orders[index])
- {
- log_err("Mismatch at index : 0x%x\n", index);
- return;
- }
+ if (o != orders[index].order) {
+ log_err("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X\n", index,
+ orders[index].order, o);
+ goto bail;
+ }
}
}
+
+#if TEST_OFFSETS
+ if (offset != orders[index].offset) {
+ log_err("Mismatched offset at index %d: %d vs. %d\n", index,
+ orders[index].offset, offset);
+ goto bail;
+ }
+#endif
+
}
- while (index != 0 && orders[index - 1] == 0) {
- index --;
+ while (index != 0 && orders[index - 1].order == 0) {
+ index -= 1;
}
- if (index != 0)
- {
+ if (index != 0) {
log_err("Didn't get back to beginning - index is %d\n", index);
ucol_reset(iter);
log_err("\nnext: ");
- if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER)
- {
+
+ if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER) {
log_err("Error at %x\n", o);
}
+
log_err("\nprev: ");
- if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
- {
+
+ if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER) {
log_err("Error at %x\n", o);
}
+
log_verbose("\n");
}
+bail:
free(orders);
}
Modified: trunk/source/test/cintltst/callcoll.h
===================================================================
--- trunk/source/test/cintltst/callcoll.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/cintltst/callcoll.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2006, International Business Machines Corporation and
+ * Copyright (c) 1997-2008, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@@ -29,13 +29,20 @@
#define RULE_BUFFER_LEN 8192
+struct OrderAndOffset
+{
+ int32_t order;
+ int32_t offset;
+};
+typedef struct OrderAndOffset OrderAndOffset;
+
/* tests comparison of custom collation with different strengths */
void doTest(UCollator*, const UChar* source, const UChar* target, UCollationResult result);
/* verify that iterating forward and backwards over the string yields same CEs */
void backAndForth(UCollationElements *iter);
/* gets an array of CEs for a string in UCollationElements iterator. */
-int32_t* getOrders(UCollationElements *iter, int32_t *orderLength);
+OrderAndOffset* getOrders(UCollationElements *iter, int32_t *orderLength);
void genericOrderingTestWithResult(UCollator *coll, const char * const s[], uint32_t size, UCollationResult result);
void genericOrderingTest(UCollator *coll, const char * const s[], uint32_t size);
Modified: trunk/source/test/cintltst/ccaltst.c
===================================================================
--- trunk/source/test/cintltst/ccaltst.c 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/cintltst/ccaltst.c 2008-05-31 14:31:31 UTC (rev 153)
@@ -58,7 +58,7 @@
static void TestCalendar()
{
- UCalendar *caldef = 0, *caldef2 = 0, *calfr = 0, *calit = 0;
+ UCalendar *caldef = 0, *caldef2 = 0, *calfr = 0, *calit = 0, *calfrclone = 0;
UEnumeration* uenum = NULL;
int32_t count, count2, i,j;
UChar tzID[4];
@@ -234,8 +234,13 @@
if(U_FAILURE(status)) {
log_err("FAIL: error in ucal_open calit : %s\n", u_errorName(status));
}
+
+ /*Testing the clone() function*/
+ calfrclone = ucal_clone(calfr, &status);
+ if(U_FAILURE(status)){
+ log_err("FAIL: error in ucal_clone calfr : %s\n", u_errorName(status));
+ }
-
/*Testing udat_getAvailable() and udat_countAvailable()*/
log_verbose("\nTesting getAvailableLocales and countAvailable()\n");
count=ucal_countAvailable();
@@ -256,7 +261,7 @@
log_verbose("\nTesting ucal_equivalentTo()\n");
if(caldef && caldef2 && calfr && calit) {
if(ucal_equivalentTo(caldef, caldef2) == FALSE || ucal_equivalentTo(caldef, calfr)== TRUE ||
- ucal_equivalentTo(caldef, calit)== TRUE) {
+ ucal_equivalentTo(caldef, calit)== TRUE || ucal_equivalentTo(calfr, calfrclone) == FALSE) {
log_err("FAIL: Error. equivalentTo test failed\n");
} else {
log_verbose("PASS: equivalentTo test passed\n");
@@ -390,6 +395,7 @@
ucal_close(caldef2);
ucal_close(calfr);
ucal_close(calit);
+ ucal_close(calfrclone);
/*closing the UDateFormat used */
udat_close(datdef);
free(result);
Modified: trunk/source/test/cintltst/citertst.c
===================================================================
--- trunk/source/test/cintltst/citertst.c 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/cintltst/citertst.c 2008-05-31 14:31:31 UTC (rev 153)
@@ -562,7 +562,7 @@
UCollator *en_us=NULL;
UCollationElements *iter, *pristine;
int32_t offset;
- int32_t *orders;
+ OrderAndOffset *orders;
int32_t orderLength=0;
int count = 0;
UChar test1[50];
@@ -649,7 +649,7 @@
switch (count) {
case 0:
if (ucol_getOffset(iter) != 1) {
- log_err("ERROR: Offset of iteration should be 0\n");
+ log_err("ERROR: Offset of iteration should be 1\n");
}
break;
case 3:
@@ -671,10 +671,16 @@
U_SUCCESS(status)) {
switch (count) {
case 0:
+ case 1:
if (ucol_getOffset(iter) != 3) {
log_err("ERROR: Offset of iteration should be 3\n");
}
break;
+ case 2:
+ if (ucol_getOffset(iter) != 1) {
+ log_err("ERROR: Offset of iteration should be 1\n");
+ }
+ break;
default:
if (ucol_getOffset(iter) != 0) {
log_err("ERROR: Offset of iteration should be 0\n");
@@ -937,7 +943,7 @@
UCollationElements *testiter,
*iter;
int32_t count = 0;
- int32_t *testorders,
+ OrderAndOffset *testorders,
*orders;
UChar teststr[500];
@@ -977,8 +983,8 @@
while (count != 0) {
/* UCA collation element for 0x0F76 */
- if ((count > 250 && testorders[-- count] != orders[1]) ||
- (count <= 250 && testorders[-- count] != orders[0])) {
+ if ((count > 250 && testorders[-- count].order != orders[1].order) ||
+ (count <= 250 && testorders[-- count].order != orders[0].order)) {
log_err("Error decomposition does not give the right collation element at %d count\n", count);
break;
}
Modified: trunk/source/test/cintltst/usrchdat.c
===================================================================
--- trunk/source/test/cintltst/usrchdat.c 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/cintltst/usrchdat.c 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,5 +1,5 @@
/********************************************************************
- * Copyright (c) 2001-2007 International Business Machines
+ * Copyright (c) 2001-2008 International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************
* File USRCHDAT.H
@@ -19,6 +19,9 @@
#if !UCONFIG_NO_COLLATION
+/* Set to 1 if matches must be on grapheme boundaries */
+#define GRAPHEME_BOUNDARIES 1
+
U_CDECL_BEGIN
struct SearchData {
const char *text;
@@ -51,9 +54,15 @@
{"Scott Ganyo", " ", NULL, UCOL_TERTIARY, NULL, {5, -1}, {1}},
{"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
{"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
- {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1},
- {2}},
+
+#if GRAPHEME_BOUNDARIES
+ {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
+ {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
{"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
+#endif
+
{"\\u00c9", "e", NULL, UCOL_PRIMARY, NULL, {0, -1}, {1}},
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};
@@ -74,6 +83,10 @@
{"testing that string ab\\u00e9cd does not match e", "e", NULL,
UCOL_TERTIARY, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}},
{"\\u00c9", "e", "fr", UCOL_PRIMARY, "characterbreaker", {0, -1}, {1}},
+#if 0
+ /* Problem reported by Dave Bertoni, same as ticket 4279? */
+ {"\\u0043\\u004F\\u0302\\u0054\\u00C9", "\\u004F", NULL, UCOL_TERTIARY, "characterbreaker", {1, -1}, {2}},
+#endif
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};
@@ -92,6 +105,12 @@
{7, 7, 7, 7}},
{"\\u00c0 should match but not A", "A\\u0300", "en", UCOL_IDENTICAL,
NULL, {0, -1}, {1, 0}},
+
+#if 0
+ /* Ticket 5382 */
+ {"12\\u0171", "\\u0170", NULL, UCOL_SECONDARY, NULL, {2, -1}, {2}},
+#endif
+
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};
@@ -120,14 +139,19 @@
};
static const SearchData NORMEXACT[] = {
- {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1},
- {2}},
+ {"a\\u0300\\u0325", "a\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}},
+
+#if GRAPHEME_BOUNDARIES
+ {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
+ {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
+#endif
+
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};
static const SearchData NONNORMEXACT[] = {
- {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {-1},
- {0}},
+ {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};
@@ -167,6 +191,15 @@
};
static const SearchData COMPOSITEBOUNDARIES[] = {
+#if GRAPHEME_BOUNDARIES
+ {"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+ {"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
+ {"B\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"\\u00C0B", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+#else
{"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
{"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}},
{"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}},
@@ -175,16 +208,25 @@
{"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
{"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, 1, -1},
{1, 1}},
+#endif
+
{"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
/* A + 030A + 0301 */
{"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+ {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
{"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
{"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
{"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
{"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
{"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
{"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+
+#if GRAPHEME_BOUNDARIES
+ {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
{"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+#endif
+
{"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
{"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
{"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
@@ -193,6 +235,15 @@
{"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
{"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
{"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+
+ /* Ticket 5024 */
+ {"a\\u00e1", "a\\u00e1", NULL, UCOL_SECONDARY, NULL, {0, -1}, {2}},
+
+ /* Ticket 5420 */
+ {"fu\\u00dfball", "fu\\u00df", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}},
+ {"fu\\u00dfball", "fuss", NULL, UCOL_PRIMARY, NULL, {0, -1}, {3}},
+ {"fu\\u00dfball", "uss", NULL, UCOL_PRIMARY, NULL, {1, -1}, {2}},
+
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};
@@ -229,12 +280,24 @@
static const SearchData CONTRACTION[] = {
/* common discontiguous */
{"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
- {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1},
- {2}},
+
+#if GRAPHEME_BOUNDARIES
+ {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
+ {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
+#endif
+
/* contraction prefix */
{"AB\\u0315C", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+
+#if GRAPHEME_BOUNDARIES
+ {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
{"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
{"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {2, -1}, {1}},
+#endif
+
/* discontiguous problem here for backwards iteration.
accents not found because discontiguous stores all information */
{"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {-1},
@@ -249,15 +312,37 @@
/* blocked discontiguous */
{"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL,
{-1}, {0}},
+
+#if GRAPHEME_BOUNDARIES
+ /*
+ * "ab" generates a contraction that's an expansion. The "z" matches the
+ * first CE of the expansion but the match fails because it ends in the
+ * middle of an expansion...
+ */
+ {"ab", "z", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
{"ab", "z", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
+#endif
+
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};
static const char *IGNORABLERULE = "&a = \\u0300";
static const SearchData IGNORABLE[] = {
+#if GRAPHEME_BOUNDARIES
+ /*
+ * This isn't much of a test when matches have to be on
+ * grapheme boundiaries. The match at 0 only works because
+ * it's at the start of the text.
+ */
{"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, NULL,
+ {0, -1}, {2}},
+#else
+ {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, NULL,
{0, 3, -1}, {2, 2}},
+#endif
+
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};
@@ -273,6 +358,20 @@
{6, 6}},
{"Scott Ganyo", "c", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
{"Scott Ganyo", " ", NULL, UCOL_TERTIARY, NULL, {5, -1}, {1}},
+
+#if GRAPHEME_BOUNDARIES
+ {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY,
+ NULL, {-1}, {0}},
+ {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY,
+ NULL, {-1}, {0}},
+ {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325",
+ "\\u0300b\\u0325", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
{"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
{"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
{"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1},
@@ -285,12 +384,28 @@
NULL, {0, -1}, {5}},
{"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325",
"\\u0300b\\u0325", NULL, UCOL_TERTIARY, NULL, {1, 12, -1}, {5, 3}},
+#endif
+
{"\\u00c4\\u0323", "A\\u0323\\u0308", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
{"\\u0308\\u0323", "\\u0323\\u0308", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};
+
static const SearchData NORMCANONICAL[] = {
+#if GRAPHEME_BOUNDARIES
+ /*
+ * These tests don't really mean anything. With matches restricted to grapheme
+ * boundaries, isCanonicalMatch doesn't mean anything unless normalization is
+ * also turned on...
+ */
+ {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
{"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
{"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
{"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1},
@@ -299,6 +414,8 @@
{2}},
{"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
{"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
+#endif
+
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};
@@ -397,6 +514,20 @@
};
static const SearchData COMPOSITEBOUNDARIESCANONICAL[] = {
+#if GRAPHEME_BOUNDARIES
+ {"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+ {"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
+ {"B\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"\\u00C0B", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+
+ /* first one matches only because it's at the start of the text */
+ {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+
+ /* \\u0300 blocked by \\u0300 */
+ {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
{"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
{"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}},
{"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}},
@@ -407,26 +538,66 @@
{1, 1}},
/* \\u0300 blocked by \\u0300 */
{"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
+#endif
+
/* A + 030A + 0301 */
{"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+ {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+
+#if GRAPHEME_BOUNDARIES
+ {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
{"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
{"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+#endif
+
{"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+
+#if GRAPHEME_BOUNDARIES
+ {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
{"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+#endif
+
/* blocked accent */
{"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
{"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+
+#if GRAPHEME_BOUNDARIES
+ {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
{"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
{"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
{"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
+#endif
+
{"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+
+#if GRAPHEME_BOUNDARIES
+ {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
{"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
{"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+#endif
+
{"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
+
+#if GRAPHEME_BOUNDARIES
+ {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A",
+ NULL, UCOL_TERTIARY, NULL, {10, -1}, {2}},
+#else
{"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
{"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
{"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A",
NULL, UCOL_TERTIARY, NULL, {0, 6, 10, 13, -1}, {1, 3, 2, 1}},
+#endif
+
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};
@@ -459,33 +630,59 @@
static const SearchData CONTRACTIONCANONICAL[] = {
/* common discontiguous */
+#if GRAPHEME_BOUNDARIES
+ {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
{"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
- {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1},
- {2}},
+ {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
+#endif
+
/* contraction prefix */
{"AB\\u0315C", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+
+#if GRAPHEME_BOUNDARIES
+ {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+#else
{"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
{"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {2, -1}, {1}},
+#endif
+
/* discontiguous problem here for backwards iteration.
forwards gives 0, 4 but backwards give 1, 3 */
/* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {0, -1},
{4}}, */
/* ends not with a contraction character */
- {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1},
- {0}},
- {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL,
- {0, -1}, {3}},
- {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL,
- {0, -1}, {4}},
+ {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+ {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}},
+
+#if GRAPHEME_BOUNDARIES
+ {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+
/* blocked discontiguous */
- {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL,
- {1, -1}, {4}},
+ {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
+
+ /*
+ * "ab" generates a contraction that's an expansion. The "z" matches the
+ * first CE of the expansion but the match fails because it ends in the
+ * middle of an expansion...
+ */
+ {"ab", "z", NULL, UCOL_TERTIARY, NULL, {-1}, {2}},
+#else
+ {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {4}},
+
+ /* blocked discontiguous */
+ {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL, {1, -1}, {4}},
+
{"ab", "z", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
+#endif
+
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};
-static const SearchData DIACTRICMATCH[] = {
+static const SearchData DIACRITICMATCH[] = {
{"\\u03BA\\u03B1\\u03B9\\u0300\\u0020\\u03BA\\u03B1\\u1F76", "\\u03BA\\u03B1\\u03B9", NULL, UCOL_PRIMARY, NULL, {0, 5,-1}, {4, 3}},
{"\\u0061\\u0061\\u00E1", "\\u0061\\u00E1", NULL, UCOL_SECONDARY, NULL, {1, -1}, {2}},
{"\\u0020\\u00C2\\u0303\\u0020\\u0041\\u0061\\u1EAA\\u0041\\u0302\\u0303\\u00C2\\u0303\\u1EAB\\u0061\\u0302\\u0303\\u00E2\\u0303\\uD806\\uDC01\\u0300\\u0020",
Modified: trunk/source/test/cintltst/usrchtst.c
===================================================================
--- trunk/source/test/cintltst/usrchtst.c 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/cintltst/usrchtst.c 2008-05-31 14:31:31 UTC (rev 153)
@@ -453,28 +453,35 @@
UCollator *collator = getCollator(search.collator);
UBreakIterator *breaker = getBreakIterator(search.breaker);
UStringSearch *strsrch;
+ UBool result = TRUE;
CHECK_BREAK_BOOL(search.breaker);
u_unescape(search.text, text, 128);
u_unescape(search.pattern, pattern, 32);
ucol_setStrength(collator, search.strength);
+ ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator,
breaker, &status);
usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
&status);
if (U_FAILURE(status)) {
log_err("Error opening string search %s\n", u_errorName(status));
- return FALSE;
+ result = FALSE;
+ goto bail;
}
if (!assertEqualWithUStringSearch(strsrch, search)) {
ucol_setStrength(collator, UCOL_TERTIARY);
usearch_close(strsrch);
- return FALSE;
+ result = FALSE;
+ goto bail;
}
+
+bail:
+ ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
ucol_setStrength(collator, UCOL_TERTIARY);
usearch_close(strsrch);
- return TRUE;
+ return result;
}
static UBool assertEqualWithAttribute(const SearchData search,
@@ -1537,7 +1544,7 @@
ucol_close(collator);
}
-static void TestDiactricMatch(void)
+static void TestDiacriticMatch(void)
{
UChar pattern[128];
UChar text[128];
@@ -1556,7 +1563,7 @@
return;
}
- search = DIACTRICMATCH[count];
+ search = DIACRITICMATCH[count];
while (search.text != NULL) {
if (search.collator != NULL) {
coll = ucol_openFromShortString(search.collator, FALSE, NULL, &status);
@@ -1584,7 +1591,7 @@
}
ucol_close(coll);
- search = DIACTRICMATCH[++count];
+ search = DIACRITICMATCH[++count];
}
usearch_close(strsrch);
}
@@ -2024,6 +2031,7 @@
UChar text[128];
UErrorCode status = U_ZERO_ERROR;
UStringSearch *strsrch;
+ UCollator *collator;
memset(pattern, 0, 32*sizeof(UChar));
memset(text, 0, 128*sizeof(UChar));
@@ -2031,8 +2039,13 @@
open();
strsrch = usearch_openFromCollator(pattern, 16, text, 32, EN_US_, NULL,
&status);
+
+ collator = usearch_getCollator(strsrch);
+ ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
+
usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
&status);
+
/* testing out of bounds error */
usearch_setOffset(strsrch, -1, &status);
if (U_SUCCESS(status)) {
@@ -2071,7 +2084,7 @@
log_err("Error match found at %d %d\n",
usearch_getMatchedStart(strsrch),
usearch_getMatchedLength(strsrch));
- return;
+ goto bail;
}
matchindex = search.offset[count + 1] == -1 ? -1 :
search.offset[count + 2];
@@ -2080,7 +2093,7 @@
&status);
if (usearch_getOffset(strsrch) != search.offset[count + 1] + 1) {
log_err("Error setting offset\n");
- return;
+ goto bail;
}
}
@@ -2095,9 +2108,12 @@
log_err("Error match found at %d %d\n",
usearch_getMatchedStart(strsrch),
usearch_getMatchedLength(strsrch));
- return;
+ goto bail;
}
}
+
+bail:
+ ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
usearch_close(strsrch);
close();
}
@@ -2242,7 +2258,7 @@
"tscoll/usrchtst/TestContractionCanonical");
addTest(root, &TestEnd, "tscoll/usrchtst/TestEnd");
addTest(root, &TestNumeric, "tscoll/usrchtst/TestNumeric");
- addTest(root, &TestDiactricMatch, "tscoll/usrchtst/TestDiactricMatch");
+ addTest(root, &TestDiacriticMatch, "tscoll/usrchtst/TestDiacriticMatch");
}
#endif /* #if !UCONFIG_NO_COLLATION */
Modified: trunk/source/test/intltest/Makefile.in
===================================================================
--- trunk/source/test/intltest/Makefile.in 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/intltest/Makefile.in 2008-05-31 14:31:31 UTC (rev 153)
@@ -56,9 +56,8 @@
itrbnf.o itrbnfrt.o itrbnfp.o ucaconf.o icusvtst.o \
uobjtest.o idnaref.o idnaconf.o nptrans.o punyref.o testidn.o testidna.o incaltst.o \
calcasts.o v32test.o uvectest.o textfile.o tokiter.o utxttest.o \
-windttst.o winnmtst.o winutil.o csdetest.o tzrulets.o tzoffloc.o tzfmttst.o
+windttst.o winnmtst.o winutil.o csdetest.o tzrulets.o tzoffloc.o tzfmttst.o ssearch.o dtifmtts.o
-
DEPS = $(OBJECTS:.o=.d)
-include Makefile.local
Added: trunk/source/test/intltest/dtifmtts.cpp
===================================================================
--- trunk/source/test/intltest/dtifmtts.cpp (rev 0)
+++ trunk/source/test/intltest/dtifmtts.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,397 @@
+
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 1997-2008, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+
+//FIXME: how to define it in compiler time
+#define DTIFMTTS_DEBUG 1
+
+
+#ifdef DTIFMTTS_DEBUG
+#include <iostream>
+#endif
+
+#include "cstring.h"
+#include "dtifmtts.h"
+#include "unicode/dtintrv.h"
+#include "unicode/dtitvinf.h"
+#include "unicode/dtitvfmt.h"
+
+
+#ifdef DTIFMTTS_DEBUG
+//#define PRINTMESG(msg) { std::cout << "(" << __FILE__ << ":" << __LINE__ << ") " << msg << "\n"; }
+#define PRINTMESG(msg) { std::cout << msg; }
+#endif
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+
+// This is an API test, not a unit test. It doesn't test very many cases, and doesn't
+// try to test the full functionality. It just calls each function in the class and
+// verifies that it works on a basic level.
+
+void DateIntervalFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) {
+ if (exec) logln("TestSuite DateIntervalFormat");
+ switch (index) {
+ // TODO: uncomment. comment out temporarily
+ //TESTCASE(0, testAPI);
+ //TESTCASE(1, testFormat);
+ default: name = ""; break;
+ }
+}
+
+/**
+ * Test various generic API methods of DateIntervalFormat for API coverage.
+ */
+void DateIntervalFormatTest::testAPI() {
+
+ /* ====== Test create interval instance with default locale and skeleton
+ */
+ UErrorCode status = U_ZERO_ERROR;
+ logln("Testing DateIntervalFormat create instance with default locale and skeleton");
+
+ DateIntervalFormat* dtitvfmt = DateIntervalFormat::createInstance(UDAT_YEAR_MONTH_DAY, status);
+ if(U_FAILURE(status)) {
+ dataerrln("ERROR: Could not create DateIntervalFormat (skeleton + default locale) - exitting");
+ return;
+ } else {
+ delete dtitvfmt;
+ }
+
+
+ /* ====== Test create interval instance with given locale and skeleton
+ */
+ status = U_ZERO_ERROR;
+ logln("Testing DateIntervalFormat create instance with given locale and skeleton");
+
+ dtitvfmt = DateIntervalFormat::createInstance(UDAT_YEAR_MONTH_DAY, Locale::getJapanese(), status);
+ if(U_FAILURE(status)) {
+ dataerrln("ERROR: Could not create DateIntervalFormat (skeleton + locale) - exitting");
+ return;
+ } else {
+ delete dtitvfmt;
+ }
+
+
+ /* ====== Test create interval instance with dateIntervalInfo and skeleton
+ */
+ status = U_ZERO_ERROR;
+ logln("Testing DateIntervalFormat create instance with dateIntervalInfo and skeleton");
+
+ DateIntervalInfo* dtitvinf = new DateIntervalInfo(Locale::getSimplifiedChinese(), status);
+
+ dtitvfmt = DateIntervalFormat::createInstance("EEEdMMMyhms", dtitvinf, status);
+ if(U_FAILURE(status)) {
+ dataerrln("ERROR: Could not create DateIntervalFormat (skeleton + DateIntervalInfo + default locale) - exitting");
+ return;
+ } else {
+ delete dtitvfmt;
+ }
+
+
+ /* ====== Test create interval instance with dateIntervalInfo and skeleton
+ */
+ status = U_ZERO_ERROR;
+ logln("Testing DateIntervalFormat create instance with dateIntervalInfo and skeleton");
+
+ dtitvinf = new DateIntervalInfo(Locale::getSimplifiedChinese(), status);
+
+ dtitvfmt = DateIntervalFormat::createInstance("EEEdMMMyhms", Locale::getSimplifiedChinese(), dtitvinf, status);
+ if(U_FAILURE(status)) {
+ dataerrln("ERROR: Could not create DateIntervalFormat (skeleton + DateIntervalInfo + locale) - exitting");
+ return;
+ }
+ // not deleted, test clone
+
+
+ // ====== Test clone()
+ status = U_ZERO_ERROR;
+ logln("Testing DateIntervalFormat clone");
+
+ DateIntervalFormat* another = (DateIntervalFormat*)dtitvfmt->clone();
+ if ( (*another) != (*dtitvfmt) ) {
+ dataerrln("ERROR: clone failed");
+ }
+
+
+ // ====== Test getDateIntervalInfo, setDateIntervalInfo, adoptDateIntervalInfo
+ status = U_ZERO_ERROR;
+ logln("Testing DateIntervalFormat getDateIntervalInfo");
+ const DateIntervalInfo* inf = another->getDateIntervalInfo();
+ dtitvfmt->setDateIntervalInfo(*inf, status);
+ const DateIntervalInfo* anotherInf = dtitvfmt->getDateIntervalInfo();
+ if ( (*inf) != (*anotherInf) || U_FAILURE(status) ) {
+ dataerrln("ERROR: getDateIntervalInfo/setDateIntervalInfo failed");
+ }
+
+ status = U_ZERO_ERROR;
+ DateIntervalInfo* nonConstInf = inf->clone();
+ dtitvfmt->adoptDateIntervalInfo(nonConstInf, status);
+ anotherInf = dtitvfmt->getDateIntervalInfo();
+ if ( (*inf) != (*anotherInf) || U_FAILURE(status) ) {
+ dataerrln("ERROR: adoptDateIntervalInfo failed");
+ }
+
+ // ====== Test getDateFormat, setDateFormat, adoptDateFormat
+
+ status = U_ZERO_ERROR;
+ logln("Testing DateIntervalFormat getDateFormat");
+ const DateFormat* fmt = another->getDateFormat();
+ dtitvfmt->setDateFormat(*fmt, status);
+ const DateFormat* anotherFmt = dtitvfmt->getDateFormat();
+ if ( (*fmt) != (*anotherFmt) || U_FAILURE(status) ) {
+ dataerrln("ERROR: getDateFormat/setDateFormat failed");
+ }
+
+ status = U_ZERO_ERROR;
+ DateFormat* nonConstFmt = (DateFormat*)fmt->clone();
+ dtitvfmt->adoptDateFormat(nonConstFmt, status);
+ anotherFmt = dtitvfmt->getDateFormat();
+ if ( (*fmt) != (*anotherFmt) || U_FAILURE(status) ) {
+ dataerrln("ERROR: adoptDateFormat failed");
+ }
+
+
+ // ======= Test getStaticClassID()
+
+ logln("Testing getStaticClassID()");
+
+
+ if(dtitvfmt->getDynamicClassID() != DateIntervalFormat::getStaticClassID()) {
+ errln("ERROR: getDynamicClassID() didn't return the expected value");
+ }
+
+ delete another;
+
+ // ====== test constructor/copy constructor and assignment
+ /* they are protected, no test
+ logln("Testing DateIntervalFormat constructor and assigment operator");
+ status = U_ZERO_ERROR;
+
+ DateFormat* constFmt = (constFmt*)dtitvfmt->getDateFormat()->clone();
+ inf = dtitvfmt->getDateIntervalInfo()->clone();
+
+
+ DateIntervalFormat* dtifmt = new DateIntervalFormat(fmt, inf, status);
+ if(U_FAILURE(status)) {
+ dataerrln("ERROR: Could not create DateIntervalFormat (default) - exitting");
+ return;
+ }
+
+ DateIntervalFormat* dtifmt2 = new(dtifmt);
+ if ( (*dtifmt) != (*dtifmt2) ) {
+ dataerrln("ERROR: Could not create DateIntervalFormat (default) - exitting");
+ return;
+ }
+
+ DateIntervalFormat dtifmt3 = (*dtifmt);
+ if ( (*dtifmt) != dtifmt3 ) {
+ dataerrln("ERROR: Could not create DateIntervalFormat (default) - exitting");
+ return;
+ }
+
+ delete dtifmt2;
+ delete dtifmt3;
+ delete dtifmt;
+ */
+
+ delete dtitvfmt;
+
+
+ //====== test format in testFormat()
+
+}
+
+
+/**
+ * Test various generic API methods of DateIntervalFormat for API coverage.
+ */
+void DateIntervalFormatTest::testFormat() {
+
+ const char* DATA[] = {
+ "yyyy MM dd HH:mm:ss",
+ "2007 10 10 10:10:10", "2008 10 10 10:10:10",
+ "2007 10 10 10:10:10", "2007 11 10 10:10:10",
+ "2007 11 10 10:10:10", "2007 11 20 10:10:10",
+ "2007 01 10 10:00:10", "2007 01 10 14:10:10",
+ "2007 01 10 10:00:10", "2007 01 10 10:20:10",
+ "2007 01 10 10:10:10", "2007 01 10 10:10:20",
+ };
+
+ const char* testLocale[][3] = {
+ {"en", "", ""},
+ {"zh", "", ""},
+ {"de", "", ""},
+ {"ar", "", ""},
+ {"en", "GB", ""},
+ {"fr", "", ""},
+ {"it", "", ""},
+ {"nl", "", ""},
+ {"zh", "TW", ""},
+ {"ja", "", ""},
+ {"pt", "BR", ""},
+ {"ru", "", ""},
+ {"pl", "", ""},
+ {"tr", "", ""},
+ {"es", "", ""},
+ {"ko", "", ""},
+ {"th", "", ""},
+ {"sv", "", ""},
+ {"fi", "", ""},
+ {"da", "", ""},
+ {"pt", "PT", ""},
+ {"ro", "", ""},
+ {"hu", "", ""},
+ {"he", "", ""},
+ {"in", "", ""},
+ {"cs", "", ""},
+ {"el", "", ""},
+ {"no", "", ""},
+ {"vi", "", ""},
+ {"bg", "", ""},
+ {"hr", "", ""},
+ {"lt", "", ""},
+ {"sk", "", ""},
+ {"sl", "", ""},
+ {"sr", "", ""},
+ {"ca", "", ""},
+ {"lv", "", ""},
+ {"uk", "", ""},
+ {"hi", "", ""},
+ };
+
+
+ uint32_t localeIndex;
+ for ( localeIndex = 0; localeIndex < ARRAY_SIZE(testLocale); ++localeIndex ) {
+ char locName[32];
+ uprv_strcpy(locName, testLocale[localeIndex][0]);
+ uprv_strcat(locName, testLocale[localeIndex][1]);
+ expect(DATA, ARRAY_SIZE(DATA), Locale(testLocale[localeIndex][0], testLocale[localeIndex][1], testLocale[localeIndex][2]), locName);
+ }
+}
+
+
+void DateIntervalFormatTest::expect(const char** data, int32_t data_length,
+ const Locale& loc, const char* locName) {
+
+ /*
+ UnicodeString formatResults[] = {
+ };
+ */
+
+ UnicodeString skeleton[] = {
+ "EEEEdMMMMy",
+ "dMMMMy",
+ "dMMMM",
+ "MMMMy",
+ "EEEEdMMMM",
+ "EEEdMMMy",
+ "dMMMy",
+ "dMMM",
+ "MMMy",
+ "EEEdMMM",
+ "EEEdMy",
+ "dMy",
+ "dM",
+ "My",
+ "EEEdM",
+ "d",
+ "EEEd",
+ "y",
+ "M",
+ "MMM",
+ "MMMM",
+ "hm",
+ "hmv",
+ "hmz",
+ "h",
+ "hv",
+ "hz",
+ "EEddMMyyyy", // following could be normalized
+ "EddMMy",
+ "hhmm",
+ "hhmmzz",
+ "hms", // following could not be normalized
+ "dMMMMMy",
+ "EEEEEdM",
+ };
+
+ int32_t i = 0;
+ UErrorCode ec = U_ZERO_ERROR;
+ UnicodeString str, str2;
+ SimpleDateFormat ref(data[i++], loc, ec);
+ if (!assertSuccess("construct SimpleDateFormat", ec)) return;
+
+#ifdef DTIFMTTS_DEBUG
+ char result[1000];
+ char mesg[1000];
+ sprintf(mesg, "locale: %s\n", locName);
+ PRINTMESG(mesg);
+#endif
+
+ while (i<data_length) {
+
+ // 'f'
+ const char* datestr = data[i++];
+ const char* datestr_2 = data[i++];
+#ifdef DTIFMTTS_DEBUG
+ sprintf(mesg, "original date: %s - %s\n", datestr, datestr_2);
+ PRINTMESG(mesg)
+#endif
+ UDate date = ref.parse(ctou(datestr), ec);
+ if (!assertSuccess("parse", ec)) return;
+ UDate date_2 = ref.parse(ctou(datestr_2), ec);
+ if (!assertSuccess("parse", ec)) return;
+ DateInterval dtitv(date, date_2);
+
+ for ( uint32_t skeletonIndex = 0;
+ skeletonIndex < ARRAY_SIZE(skeleton);
+ ++skeletonIndex ) {
+ const UnicodeString& oneSkeleton = skeleton[skeletonIndex];
+ DateIntervalFormat* dtitvfmt = DateIntervalFormat::createInstance(oneSkeleton, loc, ec);
+ if (!assertSuccess("createInstance(skeleton)", ec)) return;
+ FieldPosition pos=0;
+ dtitvfmt->format(&dtitv, str.remove(), pos, ec);
+ if (!assertSuccess("format", ec)) return;
+#ifdef DTIFMTTS_DEBUG
+ oneSkeleton.extract(0, oneSkeleton.length(), result, "UTF-8");
+ sprintf(mesg, "interval by skeleton: %s\n", result);
+ PRINTMESG(mesg)
+ str.extract(0, str.length(), result, "UTF-8");
+ sprintf(mesg, "interval date: %s\n", result);
+ PRINTMESG(mesg)
+#endif
+ delete dtitvfmt;
+ }
+
+ // test user created DateIntervalInfo
+ ec = U_ZERO_ERROR;
+ DateIntervalInfo* dtitvinf = new DateIntervalInfo(ec);
+ dtitvinf->setFallbackIntervalPattern("{0} --- {1}");
+ dtitvinf->setIntervalPattern(UDAT_YEAR_ABBR_MONTH_DAY, UCAL_MONTH, "yyyy MMM d - MMM y",ec);
+ if (!assertSuccess("DateIntervalInfo::setIntervalPattern", ec)) return;
+ dtitvinf->setIntervalPattern(UDAT_YEAR_ABBR_MONTH_DAY, UCAL_HOUR_OF_DAY, "yyyy MMM d HH:mm - HH:mm", ec);
+ if (!assertSuccess("DateIntervalInfo::setIntervalPattern", ec)) return;
+ DateIntervalFormat* dtitvfmt = DateIntervalFormat::createInstance(UDAT_YEAR_ABBR_MONTH_DAY, loc, dtitvinf, ec);
+ if (!assertSuccess("createInstance(skeleton,dtitvinf)", ec)) return;
+ FieldPosition pos=0;
+ dtitvfmt->format(&dtitv, str.remove(), pos, ec);
+ if (!assertSuccess("format", ec)) return;
+#ifdef DTIFMTTS_DEBUG
+ PRINTMESG("interval format using user defined DateIntervalInfo\n");
+ str.extract(0, str.length(), result, "UTF-8");
+ sprintf(mesg, "interval date: %s\n", result);
+ PRINTMESG(mesg)
+#endif
+ delete dtitvfmt;
+ }
+}
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
Added: trunk/source/test/intltest/dtifmtts.h
===================================================================
--- trunk/source/test/intltest/dtifmtts.h (rev 0)
+++ trunk/source/test/intltest/dtifmtts.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,41 @@
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 2008, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+
+#ifndef _INTLTESTDATEINTERVALFORMAT
+#define _INTLTESTDATEINTERVALFORMAT
+
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "intltest.h"
+
+/**
+ * Test basic functionality of various API functions
+ **/
+class DateIntervalFormatTest: public IntlTest {
+ void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL );
+
+public:
+ /**
+ * Performs tests on many API functions, see detailed comments in source code
+ **/
+ void testAPI();
+
+ /**
+ * test formatting
+ */
+ void testFormat();
+
+private:
+ void expect(const char** data, int32_t data_length, const Locale& loc,
+ const char* locName);
+};
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
Modified: trunk/source/test/intltest/intltest.vcproj
===================================================================
--- trunk/source/test/intltest/intltest.vcproj 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/intltest/intltest.vcproj 2008-05-31 14:31:31 UTC (rev 153)
@@ -366,6 +366,14 @@
>
</File>
<File
+ RelativePath=".\ssearch.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\ssearch.h"
+ >
+ </File>
+ <File
RelativePath="svccoll.cpp"
>
</File>
@@ -606,6 +614,14 @@
>
</File>
<File
+ RelativePath=".\dtifmtts.cpp"
+ >
+ </File>
+ <File
+ RelativePath=".\dtifmtts.h"
+ >
+ </File>
+ <File
RelativePath=".\dtptngts.cpp"
>
</File>
Modified: trunk/source/test/intltest/itercoll.cpp
===================================================================
--- trunk/source/test/intltest/itercoll.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/intltest/itercoll.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2003, International Business Machines Corporation and
+ * Copyright (c) 1997-2008, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@@ -188,7 +188,7 @@
// Run all the way through the iterator, then get the offset
int32_t orderLength = 0;
- int32_t *orders = getOrders(*iter, orderLength);
+ Order *orders = getOrders(*iter, orderLength);
int32_t offset = iter->getOffset();
Modified: trunk/source/test/intltest/itformat.cpp
===================================================================
--- trunk/source/test/intltest/itformat.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/intltest/itformat.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2007, International Business Machines
+ * Copyright (c) 1997-2008, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************/
@@ -50,6 +50,7 @@
#include "tzfmttst.h" // TimeZoneFormatTest
#include "plurults.h" // PluralRulesTest
#include "plurfmts.h" // PluralFormatTest
+#include "dtifmtts.h" // DateIntervalFormatTest
#define TESTCLASS(id, TestClass) \
case id: \
@@ -117,6 +118,7 @@
TESTCLASS(34,TimeZoneFormatTest);
TESTCLASS(35,PluralRulesTest);
TESTCLASS(36,PluralFormatTest);
+ TESTCLASS(37,DateIntervalFormatTest);
default: name = ""; break; //needed to end loop
}
Modified: trunk/source/test/intltest/loctest.cpp
===================================================================
--- trunk/source/test/intltest/loctest.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/intltest/loctest.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -2143,35 +2143,56 @@
UErrorCode status = U_ZERO_ERROR;
UDate date = uprv_getUTCtime();
UChar TMP[4];
+ int32_t index = 0;
+ int32_t resLen = 0;
UnicodeString tempStr, resultStr;
// Cycle through historical currencies
- date = (UDate)-630720000000.0;
- ucurr_forLocaleAndDate("eo_AM", date, TMP, 4, &status);
- tempStr.setTo(TMP);
- resultStr.setTo("AMD");
- if (resultStr != tempStr) {
- errln("FAIL: didn't return AMD for eo_AM");
+ date = (UDate)-630720000000.0; // pre 1961 - no currency defined
+ index = ucurr_countCurrencies("eo_AM", date, &status);
+ if (index != 0)
+ {
+ errln("FAIL: didn't return 0 for eo_AM");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AM", date, index, TMP, 4, &status);
+ if (resLen != 0) {
+ errln("FAIL: eo_AM didn't return NULL");
}
+ status = U_ZERO_ERROR;
- date = (UDate)0.0;
- ucurr_forLocaleAndDate("eo_AM", date, TMP, 4, &status);
+ date = (UDate)0.0; // 1970 - one currency defined
+ index = ucurr_countCurrencies("eo_AM", date, &status);
+ if (index != 1)
+ {
+ errln("FAIL: didn't return 1 for eo_AM");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AM", date, index, TMP, 4, &status);
tempStr.setTo(TMP);
resultStr.setTo("SUR");
if (resultStr != tempStr) {
errln("FAIL: didn't return SUR for eo_AM");
}
- date = (UDate)693792000000.0;
- ucurr_forLocaleAndDate("eo_AM", date, TMP, 4, &status);
+ date = (UDate)693792000000.0; // 1992 - one currency defined
+ index = ucurr_countCurrencies("eo_AM", date, &status);
+ if (index != 1)
+ {
+ errln("FAIL: didn't return 1 for eo_AM");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AM", date, index, TMP, 4, &status);
tempStr.setTo(TMP);
resultStr.setTo("RUR");
if (resultStr != tempStr) {
errln("FAIL: didn't return RUR for eo_AM");
}
- date = (UDate)977616000000.0;
- ucurr_forLocaleAndDate("eo_AM", date, TMP, 4, &status);
+ date = (UDate)977616000000.0; // post 1993 - one currency defined
+ index = ucurr_countCurrencies("eo_AM", date, &status);
+ if (index != 1)
+ {
+ errln("FAIL: didn't return 1 for eo_AM");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AM", date, index, TMP, 4, &status);
tempStr.setTo(TMP);
resultStr.setTo("AMD");
if (resultStr != tempStr) {
@@ -2179,85 +2200,233 @@
}
// Locale AD has multiple currencies at once
- date = (UDate)977616000000.0;
- ucurr_forLocaleAndDate("eo_AD", date, TMP, 4, &status);
+ date = (UDate)977616000000.0; // year 2001
+ index = ucurr_countCurrencies("eo_AD", date, &status);
+ if (index != 4)
+ {
+ errln("FAIL: didn't return 4 for eo_AD");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AD", date, 1, TMP, 4, &status);
tempStr.setTo(TMP);
resultStr.setTo("EUR");
if (resultStr != tempStr) {
errln("FAIL: didn't return EUR for eo_AD");
}
+ resLen = ucurr_forLocaleAndDate("eo_AD", date, 2, TMP, 4, &status);
+ tempStr.setTo(TMP);
+ resultStr.setTo("ESP");
+ if (resultStr != tempStr) {
+ errln("FAIL: didn't return ESP for eo_AD");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AD", date, 3, TMP, 4, &status);
+ tempStr.setTo(TMP);
+ resultStr.setTo("FRF");
+ if (resultStr != tempStr) {
+ errln("FAIL: didn't return FRF for eo_AD");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AD", date, 4, TMP, 4, &status);
+ tempStr.setTo(TMP);
+ resultStr.setTo("ADP");
+ if (resultStr != tempStr) {
+ errln("FAIL: didn't return ADP for eo_AD");
+ }
- date = (UDate)0.0;
- ucurr_forLocaleAndDate("eo_AD", date, TMP, 4, &status);
+ date = (UDate)0.0; // year 1970
+ index = ucurr_countCurrencies("eo_AD", date, &status);
+ if (index != 3)
+ {
+ errln("FAIL: didn't return 3 for eo_AD");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AD", date, 1, TMP, 4, &status);
tempStr.setTo(TMP);
resultStr.setTo("ESP");
if (resultStr != tempStr) {
errln("FAIL: didn't return ESP for eo_AD");
}
+ resLen = ucurr_forLocaleAndDate("eo_AD", date, 2, TMP, 4, &status);
+ tempStr.setTo(TMP);
+ resultStr.setTo("FRF");
+ if (resultStr != tempStr) {
+ errln("FAIL: didn't return FRF for eo_AD");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AD", date, 3, TMP, 4, &status);
+ tempStr.setTo(TMP);
+ resultStr.setTo("ADP");
+ if (resultStr != tempStr) {
+ errln("FAIL: didn't return ADP for eo_AD");
+ }
- // Locale UA has gap between years 1994 - 1996
- date = (UDate)788400000000.0;
- ucurr_forLocaleAndDate("eo_UA", date, TMP, 4, &status);
+ date = (UDate)-630720000000.0; // year 1950
+ index = ucurr_countCurrencies("eo_AD", date, &status);
+ if (index != 2)
+ {
+ errln("FAIL: didn't return 2 for eo_AD");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AD", date, 1, TMP, 4, &status);
tempStr.setTo(TMP);
- resultStr.setTo("UAH");
+ resultStr.setTo("ESP");
if (resultStr != tempStr) {
- errln("FAIL: didn't return UAH for eo_UA");
+ errln("FAIL: didn't return ESP for eo_AD");
}
+ resLen = ucurr_forLocaleAndDate("eo_AD", date, 2, TMP, 4, &status);
+ tempStr.setTo(TMP);
+ resultStr.setTo("ADP");
+ if (resultStr != tempStr) {
+ errln("FAIL: didn't return ADP for eo_AD");
+ }
+ date = (UDate)-2207520000000.0; // year 1900
+ index = ucurr_countCurrencies("eo_AD", date, &status);
+ if (index != 1)
+ {
+ errln("FAIL: didn't return 1 for eo_AD");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AD", date, 1, TMP, 4, &status);
+ tempStr.setTo(TMP);
+ resultStr.setTo("ESP");
+ if (resultStr != tempStr) {
+ errln("FAIL: didn't return ESP for eo_AD");
+ }
+
+ // Locale UA has gap between years 1994 - 1996
+ date = (UDate)788400000000.0;
+ index = ucurr_countCurrencies("eo_UA", date, &status);
+ if (index != 0)
+ {
+ errln("FAIL: didn't return 0 for eo_UA");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_UA", date, index, TMP, 4, &status);
+ if (resLen != 0) {
+ errln("FAIL: eo_UA didn't return NULL");
+ }
+ status = U_ZERO_ERROR;
+
+ // Test index bounds
+ resLen = ucurr_forLocaleAndDate("eo_UA", date, 100, TMP, 4, &status);
+ if (resLen != 0) {
+ errln("FAIL: eo_UA didn't return NULL");
+ }
+ status = U_ZERO_ERROR;
+
+ resLen = ucurr_forLocaleAndDate("eo_UA", date, 0, TMP, 4, &status);
+ if (resLen != 0) {
+ errln("FAIL: eo_UA didn't return NULL");
+ }
+ status = U_ZERO_ERROR;
+
+ // Test for bogus locale
+ index = ucurr_countCurrencies("eo_QQ", date, &status);
+ if (index != 0)
+ {
+ errln("FAIL: didn't return 0 for eo_QQ");
+ }
+ status = U_ZERO_ERROR;
+ resLen = ucurr_forLocaleAndDate("eo_QQ", date, 1, TMP, 4, &status);
+ if (resLen != 0) {
+ errln("FAIL: eo_QQ didn't return NULL");
+ }
+ status = U_ZERO_ERROR;
+ resLen = ucurr_forLocaleAndDate("eo_QQ", date, 0, TMP, 4, &status);
+ if (resLen != 0) {
+ errln("FAIL: eo_QQ didn't return NULL");
+ }
+ status = U_ZERO_ERROR;
+
// Cycle through histrocial currencies
- date = (UDate)977616000000.0;
- ucurr_forLocaleAndDate("eo_AO", date, TMP, 4, &status);
+ date = (UDate)977616000000.0; // 2001 - one currency
+ index = ucurr_countCurrencies("eo_AO", date, &status);
+ if (index != 1)
+ {
+ errln("FAIL: didn't return 1 for eo_AO");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AO", date, 1, TMP, 4, &status);
tempStr.setTo(TMP);
resultStr.setTo("AOA");
if (resultStr != tempStr) {
errln("FAIL: didn't return AOA for eo_AO");
}
- date = (UDate)819936000000.0;
- ucurr_forLocaleAndDate("eo_AO", date, TMP, 4, &status);
+ date = (UDate)819936000000.0; // 1996 - 2 currencies
+ index = ucurr_countCurrencies("eo_AO", date, &status);
+ if (index != 2)
+ {
+ errln("FAIL: didn't return 1 for eo_AO");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AO", date, 1, TMP, 4, &status);
tempStr.setTo(TMP);
resultStr.setTo("AOR");
if (resultStr != tempStr) {
errln("FAIL: didn't return AOR for eo_AO");
}
-
- date = (UDate)662256000000.0;
- ucurr_forLocaleAndDate("eo_AO", date, TMP, 4, &status);
+ resLen = ucurr_forLocaleAndDate("eo_AO", date, 2, TMP, 4, &status);
tempStr.setTo(TMP);
resultStr.setTo("AON");
if (resultStr != tempStr) {
errln("FAIL: didn't return AON for eo_AO");
}
- date = (UDate)315360000000.0;
- ucurr_forLocaleAndDate("eo_AO", date, TMP, 4, &status);
+ date = (UDate)662256000000.0; // 1991 - 2 currencies
+ index = ucurr_countCurrencies("eo_AO", date, &status);
+ if (index != 2)
+ {
+ errln("FAIL: didn't return 1 for eo_AO");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AO", date, 1, TMP, 4, &status);
tempStr.setTo(TMP);
+ resultStr.setTo("AON");
+ if (resultStr != tempStr) {
+ errln("FAIL: didn't return AON for eo_AO");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AO", date, 2, TMP, 4, &status);
+ tempStr.setTo(TMP);
resultStr.setTo("AOK");
if (resultStr != tempStr) {
errln("FAIL: didn't return AOK for eo_AO");
}
- date = (UDate)0.0;
- ucurr_forLocaleAndDate("eo_AO", date, TMP, 4, &status);
+ date = (UDate)315360000000.0; // 1980 - one currency
+ index = ucurr_countCurrencies("eo_AO", date, &status);
+ if (index != 1)
+ {
+ errln("FAIL: didn't return 1 for eo_AO");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AO", date, 1, TMP, 4, &status);
tempStr.setTo(TMP);
- resultStr.setTo("AOA");
+ resultStr.setTo("AOK");
if (resultStr != tempStr) {
- errln("FAIL: didn't return AOA for eo_AO");
+ errln("FAIL: didn't return AOK for eo_AO");
}
+ date = (UDate)0.0; // 1970 - no currencies
+ index = ucurr_countCurrencies("eo_AO", date, &status);
+ if (index != 0)
+ {
+ errln("FAIL: didn't return 1 for eo_AO");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_AO", date, 1, TMP, 4, &status);
+ if (resLen != 0) {
+ errln("FAIL: eo_AO didn't return NULL");
+ }
+ status = U_ZERO_ERROR;
+
// Test with currency keyword override
- ucurr_forLocaleAndDate("eo_DE at currency=DEM", date, TMP, 4, &status);
+ date = (UDate)977616000000.0; // 2001 - two currencies
+ index = ucurr_countCurrencies("eo_DE at currency=DEM", date, &status);
+ if (index != 2)
+ {
+ errln("FAIL: didn't return 2 for eo_DE at currency=DEM");
+ }
+ resLen = ucurr_forLocaleAndDate("eo_DE at currency=DEM", date, 1, TMP, 4, &status);
tempStr.setTo(TMP);
- resultStr.setTo("DEM");
+ resultStr.setTo("EUR");
if (resultStr != tempStr) {
- errln("FAIL: didn't return DEM for eo_DE at currency=DEM");
+ errln("FAIL: didn't return EUR for eo_DE at currency=DEM");
}
-
- ucurr_forLocaleAndDate("fr_FR at currency=FRF", date, TMP, 4, &status);
+ resLen = ucurr_forLocaleAndDate("eo_DE at currency=DEM", date, 2, TMP, 4, &status);
tempStr.setTo(TMP);
- resultStr.setTo("FRF");
+ resultStr.setTo("DEM");
if (resultStr != tempStr) {
- errln("FAIL: didn't return FRF for fr_FR at currency=FRF");
+ errln("FAIL: didn't return DEM for eo_DE at currency=DEM");
}
// Test Euro Support
@@ -2265,82 +2434,23 @@
date = uprv_getUTCtime();
UChar USD[4];
- ucurr_forLocaleAndDate("en_US", date, USD, 4, &status);
+ ucurr_forLocaleAndDate("en_US", date, 1, USD, 4, &status);
UChar YEN[4];
- ucurr_forLocaleAndDate("ja_JP", date, YEN, 4, &status);
+ ucurr_forLocaleAndDate("ja_JP", date, 1, YEN, 4, &status);
- ucurr_forLocaleAndDate("en_US", date, TMP, 4, &status);
+ ucurr_forLocaleAndDate("en_US", date, 1, TMP, 4, &status);
if (u_strcmp(USD, TMP) != 0) {
errln("Fail: en_US didn't return USD");
}
- ucurr_forLocaleAndDate("en_US_PREEURO", date, TMP, 4, &status);
+ ucurr_forLocaleAndDate("en_US_PREEURO", date, 1, TMP, 4, &status);
if (u_strcmp(USD, TMP) != 0) {
errln("Fail: en_US_PREEURO didn't fallback to en_US");
}
- ucurr_forLocaleAndDate("en_US_Q", date, TMP, 4, &status);
+ ucurr_forLocaleAndDate("en_US_Q", date, 1, TMP, 4, &status);
if (u_strcmp(USD, TMP) != 0) {
errln("Fail: en_US_Q didn't fallback to en_US");
}
- int32_t invalidLen = ucurr_forLocaleAndDate("en_QQ", date, TMP, 4, &status);
- if (invalidLen || U_SUCCESS(status)) {
- errln("Fail: en_QQ didn't return NULL");
- }
status = U_ZERO_ERROR; // reset
- // Test currency registration
-#if !UCONFIG_NO_SERVICE
-
- static const UChar QQQ[] = {0x51, 0x51, 0x51, 0};
- if(U_FAILURE(status)) {
- errln("Unable to get currency for locale, error %s", u_errorName(status));
- return;
- }
-
- UCurrRegistryKey enkey = ucurr_register(YEN, "en_US", &status);
- UCurrRegistryKey enUSEUROkey = ucurr_register(QQQ, "en_US_EURO", &status);
-
- ucurr_forLocaleAndDate("en_US", date, TMP, 4, &status);
- if (u_strcmp(YEN, TMP) != 0) {
- errln("FAIL: didn't return YEN registered for en_US");
- }
-
- ucurr_forLocaleAndDate("en_US_EURO", date, TMP, 4, &status);
- if (u_strcmp(QQQ, TMP) != 0) {
- errln("FAIL: didn't return QQQ for en_US_EURO");
- }
-
- int32_t fallbackLen = ucurr_forLocaleAndDate("en_XX_BAR", date, TMP, 4, &status);
- if (fallbackLen) {
- errln("FAIL: tried to fallback en_XX_BAR");
- }
- status = U_ZERO_ERROR; // reset
-
- if (!ucurr_unregister(enkey, &status)) {
- errln("FAIL: couldn't unregister enkey");
- }
-
- ucurr_forLocaleAndDate("en_US", date, TMP, 4, &status);
- if (u_strcmp(USD, TMP) != 0) {
- errln("FAIL: didn't return USD for en_US after unregister of en_US");
- }
- status = U_ZERO_ERROR; // reset
-
- ucurr_forLocaleAndDate("en_US_EURO", date, TMP, 4, &status);
- if (u_strcmp(QQQ, TMP) != 0) {
- errln("FAIL: didn't return QQQ for en_US_EURO after unregister of en_US");
- }
-
- ucurr_forLocaleAndDate("en_US_BLAH", date, TMP, 4, &status);
- if (u_strcmp(USD, TMP) != 0) {
- errln("FAIL: could not find USD for en_US_BLAH after unregister of en");
- }
- status = U_ZERO_ERROR; // reset
-
- if (!ucurr_unregister(enUSEUROkey, &status)) {
- errln("FAIL: couldn't unregister enUSEUROkey");
- }
-
- status = U_ZERO_ERROR; // reset
-#endif
}
Added: trunk/source/test/intltest/plurfmts.cpp
===================================================================
--- trunk/source/test/intltest/plurfmts.cpp (rev 0)
+++ trunk/source/test/intltest/plurfmts.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,510 @@
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 2007-2008, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "plurults.h"
+#include "plurfmts.h"
+#include "cmemory.h"
+#include "unicode/plurrule.h"
+#include "unicode/plurfmt.h"
+
+
+const UnicodeString oddAndEvenRule = UNICODE_STRING_SIMPLE("odd: n mod 2 is 1");
+#define PLURAL_PATTERN_DATA 4
+#define PLURAL_TEST_ARRAY_SIZE 256
+
+#define PLURAL_SYNTAX_DATA 8
+
+// The value must be same as PLKeywordLookups[] order.
+#define PFT_ZERO 0
+#define PFT_ONE 1
+#define PFT_TWO 2
+#define PFT_FEW 3
+#define PFT_MANY 4
+#define PFT_OTHER 5
+
+void PluralFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
+{
+ if (exec) logln("TestSuite PluralFormat");
+ switch (index) {
+ TESTCASE(0, pluralFormatBasicTest);
+ TESTCASE(1, pluralFormatUnitTest);
+ TESTCASE(2, pluralFormatLocaleTest);
+ default: name = "";
+ break;
+ }
+}
+
+/**
+ * Test various generic API methods of PluralFormat for Basic usage.
+ */
+void PluralFormatTest::pluralFormatBasicTest(/*char *par*/)
+{
+ UErrorCode status[8];
+ PluralFormat* plFmt[8];
+ Locale locale = Locale::getDefault();
+ UnicodeString otherPattern = UnicodeString("other{#}");
+ UnicodeString message=UnicodeString("ERROR: PluralFormat basic test");
+
+ // ========= Test constructors
+ logln(" Testing PluralFormat constructors ...");
+ status[0] = U_ZERO_ERROR;
+ PluralRules* plRules = PluralRules::createDefaultRules(status[0]);
+
+ status[0] = U_ZERO_ERROR;
+ NumberFormat *numFmt = NumberFormat::createInstance(status[0]);
+ if (U_FAILURE(status[0])) {
+ dataerrln("ERROR: Could not create NumberFormat instance with default locale ");
+ }
+
+ for (int32_t i=0; i< 8; ++i) {
+ status[i] = U_ZERO_ERROR;
+ }
+ plFmt[0] = new PluralFormat(status[0]);
+ plFmt[1] = new PluralFormat(*plRules, status[1]);
+ plFmt[2] = new PluralFormat(locale, status[2]);
+ plFmt[3] = new PluralFormat(locale, *plRules, status[3]);
+ plFmt[4] = new PluralFormat(otherPattern, status[4]);
+ plFmt[5] = new PluralFormat(*plRules, otherPattern, status[5]);
+ plFmt[6] = new PluralFormat(locale, otherPattern, status[6]);
+ plFmt[7] = new PluralFormat(locale, *plRules, otherPattern, status[7]);
+
+ for (int32_t i=0; i< 8; ++i) {
+ if (U_SUCCESS(status[i])) {
+ numberFormatTest(plFmt[i], numFmt, 1, 12, NULL, NULL, FALSE, &message);
+ numberFormatTest(plFmt[i], numFmt, 100, 112, NULL, NULL, FALSE, &message);
+ }
+ else {
+ dataerrln("ERROR: PluralFormat constructor failed!");
+ }
+ delete plFmt[i];
+ }
+ // ======= Test clone, assignment operator && == operator.
+ plFmt[0]= new PluralFormat(status[0]);
+ plFmt[1]= new PluralFormat(locale, status[1]);
+ *plFmt[1] = *plFmt[0];
+ if (plFmt[1]!=NULL) {
+ if ( *plFmt[1] != *plFmt[0] ) {
+ errln("ERROR: clone plural format test failed!");
+ }
+ }
+ plFmt[2]= new PluralFormat(locale, status[1]);
+ *plFmt[1] = *plFmt[2];
+ if (plFmt[1]!=NULL) {
+ if ( *plFmt[1] != *plFmt[2] ) {
+ errln("ERROR: assignment operator test failed!");
+ }
+ delete plFmt[1];
+ }
+ delete plFmt[0];
+ delete plFmt[2];
+ delete numFmt;
+ delete plRules;
+}
+
+/**
+ * Unit tests of PluralFormat class.
+ */
+void PluralFormatTest::pluralFormatUnitTest(/*char *par*/)
+{
+ UnicodeString patternTestData[PLURAL_PATTERN_DATA] = {
+ UNICODE_STRING_SIMPLE("odd {# is odd.} other{# is even.}"),
+ UNICODE_STRING_SIMPLE("other{# is odd or even.}"),
+ UNICODE_STRING_SIMPLE("odd{The number {0, number, #.#0} is odd.}other{The number {0, number, #.#0} is even.}"),
+ UNICODE_STRING_SIMPLE("odd{The number {#} is odd.}other{The number {#} is even.}"),
+ };
+ UnicodeString patternOddTestResult[PLURAL_PATTERN_DATA] = {
+ UNICODE_STRING_SIMPLE(" is odd."),
+ UNICODE_STRING_SIMPLE(" is odd or even."),
+ UNICODE_STRING_SIMPLE("The number {0, number, #.#0} is odd."),
+ UNICODE_STRING_SIMPLE("The number {#} is odd."),
+ };
+ UnicodeString patternEvenTestResult[PLURAL_PATTERN_DATA] = {
+ UNICODE_STRING_SIMPLE(" is even."),
+ UNICODE_STRING_SIMPLE(" is odd or even."),
+ UNICODE_STRING_SIMPLE("The number {0, number, #.#0} is even."),
+ UNICODE_STRING_SIMPLE("The number {#} is even."),
+ };
+ UnicodeString checkSyntaxtData[PLURAL_SYNTAX_DATA] = {
+ UNICODE_STRING_SIMPLE("odd{foo} odd{bar} other{foobar}"),
+ UNICODE_STRING_SIMPLE("odd{foo} other{bar} other{foobar}"),
+ UNICODE_STRING_SIMPLE("odd{foo}"),
+ UNICODE_STRING_SIMPLE("otto{foo} other{bar}"),
+ UNICODE_STRING_SIMPLE("1odd{foo} other{bar}"),
+ UNICODE_STRING_SIMPLE("odd{foo},other{bar}"),
+ UNICODE_STRING_SIMPLE("od d{foo} other{bar}"),
+ UNICODE_STRING_SIMPLE("odd{foo}{foobar}other{foo}"),
+ };
+
+ UErrorCode status = U_ZERO_ERROR;
+ PluralRules* plRules = PluralRules::createRules(oddAndEvenRule, status);
+ if (U_FAILURE(status)) {
+ dataerrln("ERROR: create PluralRules instance failed in unit tests.- exitting");
+ return;
+ }
+
+ // ======= Test PluralRules pattern syntax.
+ logln("Testing PluralRules pattern syntax.");
+ for (int32_t i=0; i<PLURAL_SYNTAX_DATA; ++i) {
+ status = U_ZERO_ERROR;
+
+ PluralFormat plFmt=PluralFormat(*plRules, status);
+ if (U_FAILURE(status)) {
+ dataerrln("ERROR: PluralFormat constructor failed in unit tests.- exitting");
+ return;
+ }
+ plFmt.applyPattern(checkSyntaxtData[i], status);
+ if (U_SUCCESS(status)) {
+ errln("ERROR: PluralFormat failed to detect syntax error with pattern: "+checkSyntaxtData[i]);
+ }
+ }
+
+
+
+ // ======= Test applying various pattern
+ logln("Testing various patterns");
+ status = U_ZERO_ERROR;
+ UBool overwrite[PLURAL_PATTERN_DATA] = {FALSE, FALSE, TRUE, TRUE};
+
+ NumberFormat *numFmt = NumberFormat::createInstance(status);
+ UnicodeString message=UnicodeString("ERROR: PluralFormat tests various pattern ...");
+ if (U_FAILURE(status)) {
+ dataerrln("ERROR: Could not create NumberFormat instance with default locale ");
+ }
+ for(int32_t i=0; i<PLURAL_PATTERN_DATA; ++i) {
+ status = U_ZERO_ERROR;
+ PluralFormat plFmt=PluralFormat(*plRules, status);
+ if (U_FAILURE(status)) {
+ dataerrln("ERROR: PluralFormat constructor failed in unit tests.- exitting");
+ return;
+ }
+ plFmt.applyPattern(patternTestData[i], status);
+ if (U_FAILURE(status)) {
+ errln("ERROR: PluralFormat failed to apply pattern- "+patternTestData[i]);
+ continue;
+ }
+ numberFormatTest(&plFmt, numFmt, 1, 10, (UnicodeString *)&patternOddTestResult[i],
+ (UnicodeString *)&patternEvenTestResult[i], overwrite[i], &message);
+ }
+ delete plRules;
+ delete numFmt;
+
+ // ======= Test set locale
+ status = U_ZERO_ERROR;
+ plRules = PluralRules::createRules(UNICODE_STRING_SIMPLE("odd: n mod 2 is 1"), status);
+ PluralFormat pluralFmt = PluralFormat(*plRules, status);
+ if (U_FAILURE(status)) {
+ dataerrln("ERROR: Could not create PluralFormat instance in setLocale() test - exitting. ");
+ delete plRules;
+ return;
+ }
+ pluralFmt.applyPattern(UNICODE_STRING_SIMPLE("odd{odd} other{even}"), status);
+ pluralFmt.setLocale(Locale::getEnglish(), status);
+ if (U_FAILURE(status)) {
+ dataerrln("ERROR: Could not setLocale() with English locale ");
+ delete plRules;
+ return;
+ }
+ message = UNICODE_STRING_SIMPLE("Error set locale: pattern is not reset!");
+
+ // Check that pattern gets deleted.
+ logln("\n Test setLocale() ..\n");
+ numFmt = NumberFormat::createInstance(Locale::getEnglish(), status);
+ if (U_FAILURE(status)) {
+ dataerrln("ERROR: Could not create NumberFormat instance with English locale ");
+ }
+ numberFormatTest(&pluralFmt, numFmt, 5, 5, NULL, NULL, FALSE, &message);
+ pluralFmt.applyPattern(UNICODE_STRING_SIMPLE("odd__{odd} other{even}"), status);
+ if (U_SUCCESS(status)) {
+ errln("SetLocale should reset rules but did not.");
+ }
+ status = U_ZERO_ERROR;
+ pluralFmt.applyPattern(UNICODE_STRING_SIMPLE("one{one} other{not one}"), status);
+ if (U_FAILURE(status)) {
+ errln("SetLocale should reset rules but did not.");
+ }
+ UnicodeString one = UNICODE_STRING_SIMPLE("one");
+ UnicodeString notOne = UNICODE_STRING_SIMPLE("not one");
+ UnicodeString plResult, numResult;
+ for (int32_t i=0; i<20; ++i) {
+ plResult = pluralFmt.format(i, status);
+ if ( i==1 ) {
+ numResult = one;
+ }
+ else {
+ numResult = notOne;
+ }
+ if ( numResult != plResult ) {
+ errln("Wrong ruleset loaded by setLocale() - got:"+plResult+ UnicodeString(" expecting:")+numResult);
+ }
+ }
+
+ // =========== Test copy constructor
+ logln("Test copy constructor and == operator of PluralFormat");
+ PluralFormat dupPFmt = PluralFormat(pluralFmt);
+ if (pluralFmt != dupPFmt) {
+ errln("Failed in PluralFormat copy constructor or == operator");
+ }
+
+ delete plRules;
+ delete numFmt;
+}
+
+
+
+/**
+ * Test locale data used in PluralFormat class.
+ */
+void
+PluralFormatTest::pluralFormatLocaleTest(/*char *par*/)
+{
+ int8_t pluralResults[PLURAL_TEST_ARRAY_SIZE]; // 0: is for default
+
+ // ======= Test DefaultRule
+ logln("Testing PluralRules with no rule.");
+ const char* oneRuleLocales[4] = {"ja", "ko", "tr", "vi"};
+ UnicodeString testPattern = UNICODE_STRING_SIMPLE("other{other}");
+ uprv_memset(pluralResults, -1, sizeof(pluralResults));
+ pluralResults[0]= PFT_OTHER; // other
+ helperTestRusults(oneRuleLocales, 4, testPattern, pluralResults);
+
+ // ====== Test Singular1 locales.
+ logln("Testing singular1 locales.");
+ const char* singular1Locales[19] = {"da","de","el","en","eo","es","et","fi",
+ "fo","he","hu","it","nb","nl","nn","no","pt","sv"};
+ testPattern = UNICODE_STRING_SIMPLE("one{one} other{other}");
+ uprv_memset(pluralResults, -1, sizeof(pluralResults));
+ pluralResults[0]= PFT_OTHER;
+ pluralResults[1]= PFT_ONE;
+ pluralResults[2]= PFT_OTHER;
+ helperTestRusults(singular1Locales, 19, testPattern, pluralResults);
+
+ // ======== Test Singular01 locales.
+ logln("Testing singular1 locales.");
+ const char* singular01Locales[2] = {"fr","pt_BR"};
+ testPattern = UNICODE_STRING_SIMPLE("one{one} other{other}");
+ uprv_memset(pluralResults, -1, sizeof(pluralResults));
+ pluralResults[0]= PFT_ONE;
+ pluralResults[2]= PFT_OTHER;
+ helperTestRusults(singular01Locales, 2, testPattern, pluralResults);
+
+ // ======== Test ZeroSingular locales.
+ logln("Testing singular1 locales.");
+ const char* zeroSingularLocales[1] = {"lv"};
+ testPattern = UNICODE_STRING_SIMPLE("zero{zero} one{one} other{other}");
+ uprv_memset(pluralResults, -1, sizeof(pluralResults));
+ pluralResults[0]= PFT_ZERO;
+ pluralResults[1]= PFT_ONE;
+ pluralResults[2]= PFT_OTHER;
+ for (int32_t i=2; i<20; ++i) {
+ if (i==11) continue;
+ pluralResults[i*10+1] = PFT_ONE;
+ pluralResults[i*10+2] = PFT_OTHER;
+ }
+ helperTestRusults(zeroSingularLocales, 1, testPattern, pluralResults);
+
+ // ======== Test singular dual locales.
+ logln("Testing singular1 locales.");
+ const char* singularDualLocales[1] = {"ga"};
+ testPattern = UNICODE_STRING_SIMPLE("one{one} two{two} other{other}");
+ uprv_memset(pluralResults, -1, sizeof(pluralResults));
+ pluralResults[0]= PFT_OTHER;
+ pluralResults[1]= PFT_ONE;
+ pluralResults[2]= PFT_TWO;
+ pluralResults[3]= PFT_OTHER;
+ helperTestRusults(singularDualLocales, 1, testPattern, pluralResults);
+
+ // ======== Test Singular Zero Some locales.
+ logln("Testing singular1 locales.");
+ const char* singularZeroSomeLocales[1] = {"ro"};
+ testPattern = UNICODE_STRING_SIMPLE("zero{zero} one{one} other{other}");
+ uprv_memset(pluralResults, -1, sizeof(pluralResults));
+ pluralResults[0]= PFT_ZERO;
+ pluralResults[1]= PFT_ONE;
+ pluralResults[2]= PFT_ZERO;
+ pluralResults[20]= PFT_OTHER;
+ pluralResults[101]= PFT_ZERO;
+ pluralResults[120]= PFT_OTHER;
+ helperTestRusults(singularZeroSomeLocales, 1, testPattern, pluralResults);
+
+ // ======== Test Special 12/19.
+ logln("Testing special 12 and 19.");
+ const char* special12_19Locales[1] = {"lt"};
+ testPattern = UNICODE_STRING_SIMPLE("one{one} few{few} other{other}");
+ uprv_memset(pluralResults, -1, sizeof(pluralResults));
+ pluralResults[0]= PFT_OTHER;
+ pluralResults[1]= PFT_ONE;
+ pluralResults[2]= PFT_FEW;
+ pluralResults[10]= PFT_OTHER;
+ for (int32_t i=2; i<20; ++i) {
+ if (i==11) continue;
+ pluralResults[i*10+1] = PFT_ONE;
+ pluralResults[i*10+2] = PFT_FEW;
+ pluralResults[(i+1)*10] = PFT_OTHER;
+ }
+ helperTestRusults(special12_19Locales, 1, testPattern, pluralResults);
+
+ // ======== Test Paucal Except 11 14.
+ logln("Testing Paucal Except 11 and 14.");
+ const char* paucal01Locales[4] = {"hr","ru","sr","uk"};
+ testPattern = UNICODE_STRING_SIMPLE("one{one} few{few} other{other}");
+ uprv_memset(pluralResults, -1, sizeof(pluralResults));
+ pluralResults[0]= PFT_OTHER;
+ pluralResults[1]= PFT_ONE;
+ pluralResults[2]= PFT_FEW;
+ pluralResults[5]= PFT_OTHER;
+ for (int32_t i=2; i<20; ++i) {
+ if (i==11) continue;
+ pluralResults[i*10+1] = PFT_ONE;
+ pluralResults[i*10+2] = PFT_FEW;
+ pluralResults[i*10+5] = PFT_OTHER;
+ }
+ helperTestRusults(paucal01Locales, 4, testPattern, pluralResults);
+
+ // ======== Test Singular Paucal.
+ logln("Testing Singular Paucal.");
+ const char* singularPaucalLocales[2] = {"cs","sk"};
+ testPattern = UNICODE_STRING_SIMPLE("one{one} few{few} other{other}");
+ uprv_memset(pluralResults, -1, sizeof(pluralResults));
+ pluralResults[0]= PFT_OTHER;
+ pluralResults[1]= PFT_ONE;
+ pluralResults[2]= PFT_FEW;
+ pluralResults[5]= PFT_OTHER;
+ helperTestRusults(singularPaucalLocales, 2, testPattern, pluralResults);
+
+ // ======== Test Paucal (1), (2,3,4).
+ logln("Testing Paucal (1), (2,3,4).");
+ const char* paucal02Locales[1] = {"pl"};
+ testPattern = UNICODE_STRING_SIMPLE("one{one} few{few} other{other}");
+ uprv_memset(pluralResults, -1, sizeof(pluralResults));
+ pluralResults[0]= PFT_OTHER;
+ pluralResults[1]= PFT_ONE;
+ pluralResults[2]= PFT_FEW;
+ pluralResults[5]= PFT_OTHER;
+ for (int32_t i=2; i<20; ++i) {
+ if (i==11) continue;
+ pluralResults[i*10+2] = PFT_FEW;
+ pluralResults[i*10+5] = PFT_OTHER;
+ }
+ helperTestRusults(paucal02Locales, 1, testPattern, pluralResults);
+
+ // ======== Test Paucal (1), (2), (3,4).
+ logln("Testing Paucal (1), (2), (3,4).");
+ const char* paucal03Locales[1] = {"sl"};
+ testPattern = UNICODE_STRING_SIMPLE("one{one} two{two} few{few} other{other}");
+ uprv_memset(pluralResults, -1, sizeof(pluralResults));
+ pluralResults[0]= PFT_OTHER;
+ pluralResults[1]= PFT_ONE;
+ pluralResults[2]= PFT_TWO;
+ pluralResults[3]= PFT_FEW;
+ pluralResults[5]= PFT_OTHER;
+ pluralResults[101]= PFT_ONE;
+ pluralResults[102]= PFT_TWO;
+ pluralResults[103]= PFT_FEW;
+ pluralResults[105]= PFT_OTHER;
+ helperTestRusults(paucal03Locales, 1, testPattern, pluralResults);
+}
+
+void
+PluralFormatTest::numberFormatTest(PluralFormat* plFmt,
+ NumberFormat *numFmt,
+ int32_t start,
+ int32_t end,
+ UnicodeString *numOddAppendStr,
+ UnicodeString *numEvenAppendStr,
+ UBool overwrite, // overwrite the numberFormat.format result
+ UnicodeString *message) {
+ UErrorCode status = U_ZERO_ERROR;
+
+ if ( (plFmt==NULL) || (numFmt==NULL) ) {
+ dataerrln("ERROR: Could not create PluralFormat or NumberFormat - exitting");
+ return;
+ }
+ UnicodeString plResult, numResult ;
+
+ for (int32_t i=start; i<= end; ++i ) {
+ numResult.remove();
+ numResult = numFmt->format(i, numResult);
+ plResult = plFmt->format(i, status);
+ if ((numOddAppendStr!= NULL)&&(numEvenAppendStr!=NULL)) {
+ if (overwrite) {
+ if (i&1) {
+ numResult = *numOddAppendStr;
+ }
+ else {
+ numResult = *numEvenAppendStr;
+ }
+ }
+ else { // Append the string
+ if (i&1) {
+ numResult += *numOddAppendStr;
+ }
+ else{
+ numResult += *numEvenAppendStr;
+ }
+ }
+ }
+ if ( (numResult!=plResult) || U_FAILURE(status) ) {
+ if ( message == NULL ) {
+ errln("ERROR: Unexpected plural format - got:"+plResult+ UnicodeString(" expecting:")+numResult);
+ }
+ else {
+ errln( *message+UnicodeString(" got:")+plResult+UnicodeString(" expecting:")+numResult);
+
+ }
+ }
+ }
+ return;
+}
+
+
+void
+PluralFormatTest::helperTestRusults(const char** localeArray,
+ int32_t capacityOfArray,
+ UnicodeString& testPattern,
+ int8_t *expResults) {
+ UErrorCode status;
+ UnicodeString plResult;
+ const UnicodeString PLKeywordLookups[6] = {
+ UNICODE_STRING_SIMPLE("zero"),
+ UNICODE_STRING_SIMPLE("one"),
+ UNICODE_STRING_SIMPLE("two"),
+ UNICODE_STRING_SIMPLE("few"),
+ UNICODE_STRING_SIMPLE("many"),
+ UNICODE_STRING_SIMPLE("other"),
+ };
+
+ for (int32_t i=0; i<capacityOfArray; ++i) {
+ const char *locale = localeArray[i];
+ Locale ulocale((const char *)locale);
+ status = U_ZERO_ERROR;
+ PluralFormat plFmt(ulocale, testPattern, status);
+ if (U_FAILURE(status)) {
+ errln("Failed to apply pattern to locale:"+UnicodeString(localeArray[i]));
+ continue;
+ }
+ for (int32_t n=0; n<PLURAL_TEST_ARRAY_SIZE; ++n) {
+ if (expResults[n]!=-1) {
+ status = U_ZERO_ERROR;
+ plResult = plFmt.format(n, status);
+ if (U_FAILURE(status)) {
+ errln("ERROR: Failed to format number in locale data tests with locale: "+
+ UnicodeString(localeArray[i]));
+ }
+ if (plResult != PLKeywordLookups[expResults[n]]){
+ errln("ERROR: Unexpected format result in locale: "+UnicodeString(localeArray[i])+
+ UnicodeString("got:")+plResult+ UnicodeString(" expecting:")+
+ PLKeywordLookups[expResults[n]]);
+ }
+ }
+ }
+ }
+}
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
Added: trunk/source/test/intltest/plurults.cpp
===================================================================
--- trunk/source/test/intltest/plurults.cpp (rev 0)
+++ trunk/source/test/intltest/plurults.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,290 @@
+/*
+*******************************************************************************
+* Copyright (C) 2007-2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+********************************************************************************
+
+* File PLURRULTS.cpp
+*
+********************************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "plurults.h"
+#include "unicode/plurrule.h"
+
+
+
+void setupResult(const int32_t testSource[], char result[], int32_t* max);
+UBool checkEqual(PluralRules *test, char *result, int32_t max);
+UBool testEquality(PluralRules *test);
+
+// This is an API test, not a unit test. It doesn't test very many cases, and doesn't
+// try to test the full functionality. It just calls each function in the class and
+// verifies that it works on a basic level.
+
+void PluralRulesTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
+{
+ if (exec) logln("TestSuite PluralRulesAPI");
+ switch (index) {
+ TESTCASE(0, testAPI);
+ default: name = ""; break;
+ }
+}
+
+#define PLURAL_TEST_NUM 13
+/**
+ * Test various generic API methods of PluralRules for API coverage.
+ */
+void PluralRulesTest::testAPI(/*char *par*/)
+{
+ UnicodeString pluralTestData[PLURAL_TEST_NUM] = {
+ UNICODE_STRING_SIMPLE("a: n is 1"),
+ UNICODE_STRING_SIMPLE("a: n mod 10 is 2"),
+ UNICODE_STRING_SIMPLE("a: n is not 1"),
+ UNICODE_STRING_SIMPLE("a: n mod 3 is not 1"),
+ UNICODE_STRING_SIMPLE("a: n in 2..5"),
+ UNICODE_STRING_SIMPLE("a: n not in 2..5"),
+ UNICODE_STRING_SIMPLE("a: n mod 10 in 2..5"),
+ UNICODE_STRING_SIMPLE("a: n mod 10 is 2 and n is not 12"),
+ UNICODE_STRING_SIMPLE("a: n mod 10 in 2..3 or n mod 10 is 5"),
+ UNICODE_STRING_SIMPLE("a: n is 1 or n is 4 or n is 23"),
+ UNICODE_STRING_SIMPLE("a: n mod 2 is 1 and n is not 3 and n in 1..11"),
+ UNICODE_STRING_SIMPLE("a: n mod 2 is 1 or n mod 5 is 1 and n is not 6"),
+ "",
+ };
+ static const int32_t pluralTestResult[PLURAL_TEST_NUM][30] = {
+ {1, 0},
+ {2,12,22, 0},
+ {0,2,3,4,5,0},
+ {0,2,3,5,6,8,9,0},
+ {2,3,4,5,0},
+ {0,1,6,7,8, 0},
+ {2,3,4,5,12,13,14,15,22,23,24,25,0},
+ {2,22,32,42,0},
+ {2,3,5,12,13,15,22,23,25,0},
+ {1,4,23,0},
+ {1,5,7,9,11,0},
+ {1,3,5,7,9,11,13,15,16,0},
+ };
+ UErrorCode status = U_ZERO_ERROR;
+
+ // ======= Test constructors
+ logln("Testing PluralRules constructors");
+
+
+ logln("\n start default locale test case ..\n");
+
+ PluralRules defRule(status);
+ PluralRules* test=new PluralRules(status);
+ PluralRules* newEnPlural= test->forLocale(Locale::getEnglish(), status);
+ if(U_FAILURE(status)) {
+ dataerrln("ERROR: Could not create PluralRules (default) - exitting");
+ delete test;
+ return;
+ }
+
+ // ======= Test clone, assignment operator && == operator.
+ PluralRules *dupRule = defRule.clone();
+ if (dupRule!=NULL) {
+ if ( *dupRule != defRule ) {
+ errln("ERROR: clone plural rules test failed!");
+ }
+ }
+ *dupRule = *newEnPlural;
+ if (dupRule!=NULL) {
+ if ( *dupRule != *newEnPlural ) {
+ errln("ERROR: clone plural rules test failed!");
+ }
+ delete dupRule;
+ }
+
+ delete newEnPlural;
+
+ // ======= Test empty plural rules
+ logln("Testing Simple PluralRules");
+
+ PluralRules* empRule = test->createRules(UNICODE_STRING_SIMPLE("a:n"), status);
+ UnicodeString key;
+ for (int32_t i=0; i<10; ++i) {
+ key = empRule->select(i);
+ if ( key.charAt(0)!= 0x61 ) { // 'a'
+ errln("ERROR: empty plural rules test failed! - exitting");
+ }
+ }
+ if (empRule!=NULL) {
+ delete empRule;
+ }
+
+ // ======= Test simple plural rules
+ logln("Testing Simple PluralRules");
+
+ char result[100];
+ int32_t max;
+
+ for (int32_t i=0; i<PLURAL_TEST_NUM-1; ++i) {
+ PluralRules *newRules = test->createRules(pluralTestData[i], status);
+ setupResult(pluralTestResult[i], result, &max);
+ if ( !checkEqual(newRules, result, max) ) {
+ errln("ERROR: simple plural rules failed! - exitting");
+ delete test;
+ return;
+ }
+ if (newRules!=NULL) {
+ delete newRules;
+ }
+ }
+
+
+ // ======= Test complex plural rules
+ logln("Testing Complex PluralRules");
+ // TODO: the complex test data is hard coded. It's better to implement
+ // a parser to parse the test data.
+ UnicodeString complexRule = UNICODE_STRING_SIMPLE("a: n in 2..5; b: n in 5..8; c: n mod 2 is 1");
+ char cRuleResult[] =
+ {
+ 0x6F, // 'o'
+ 0x63, // 'c'
+ 0x61, // 'a'
+ 0x61, // 'a'
+ 0x61, // 'a'
+ 0x61, // 'a'
+ 0x62, // 'b'
+ 0x62, // 'b'
+ 0x62, // 'b'
+ 0x63, // 'c'
+ 0x6F, // 'o'
+ 0x63 // 'c'
+ };
+ PluralRules *newRules = test->createRules(complexRule, status);
+ if ( !checkEqual(newRules, cRuleResult, 12) ) {
+ errln("ERROR: complex plural rules failed! - exitting");
+ delete test;
+ return;
+ }
+ if (newRules!=NULL) {
+ delete newRules;
+ newRules=NULL;
+ }
+
+ // ======= Test Equality
+ logln("Testing Equality of PluralRules");
+
+
+ if ( !testEquality(test) ) {
+ errln("ERROR: complex plural rules failed! - exitting");
+ delete test;
+ return;
+ }
+
+
+ // ======= Test getStaticClassID()
+ logln("Testing getStaticClassID()");
+
+ if(test->getDynamicClassID() != PluralRules::getStaticClassID()) {
+ errln("ERROR: getDynamicClassID() didn't return the expected value");
+ }
+ // ====== Test fallback to parent locale
+ PluralRules *en_UK = test->forLocale(Locale::getUK(), status);
+ PluralRules *en = test->forLocale(Locale::getEnglish(), status);
+ if (en_UK != NULL && en != NULL) {
+ if ( *en_UK != *en ) {
+ errln("ERROR: test locale fallback failed!");
+ }
+ }
+ delete en;
+ delete en_UK;
+
+ PluralRules *zh_Hant = test->forLocale(Locale::getTaiwan(), status);
+ PluralRules *zh = test->forLocale(Locale::getChinese(), status);
+ if (zh_Hant != NULL && zh != NULL) {
+ if ( *zh_Hant != *zh ) {
+ errln("ERROR: test locale fallback failed!");
+ }
+ }
+ delete zh_Hant;
+ delete zh;
+ delete test;
+}
+
+void setupResult(const int32_t testSource[], char result[], int32_t* max) {
+ int32_t i=0;
+ int32_t curIndex=0;
+
+ do {
+ while (curIndex < testSource[i]) {
+ result[curIndex++]=0x6F; //'o' other
+ }
+ result[curIndex++]=0x61; // 'a'
+
+ } while(testSource[++i]>0);
+ *max=curIndex;
+}
+
+
+UBool checkEqual(PluralRules *test, char *result, int32_t max) {
+ UnicodeString key;
+ for (int32_t i=0; i<max; ++i) {
+ key= test->select(i);
+ if ( key.charAt(0)!=result[i] ) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+#define MAX_EQ_ROW 2
+#define MAX_EQ_COL 5
+UBool testEquality(PluralRules *test) {
+ UnicodeString testEquRules[MAX_EQ_ROW][MAX_EQ_COL] = {
+ { UNICODE_STRING_SIMPLE("a: n in 2..3"),
+ UNICODE_STRING_SIMPLE("a: n is 2 or n is 3"),
+ UNICODE_STRING_SIMPLE( "a:n is 3 and n in 2..5 or n is 2"),
+ "",
+ },
+ { UNICODE_STRING_SIMPLE("a: n is 12; b:n mod 10 in 2..3"),
+ UNICODE_STRING_SIMPLE("b: n mod 10 in 2..3 and n is not 12; a: n in 12..12"),
+ UNICODE_STRING_SIMPLE("b: n is 13; a: n in 12..13; b: n mod 10 is 2 or n mod 10 is 3"),
+ "",
+ }
+ };
+ UErrorCode status = U_ZERO_ERROR;
+ UnicodeString key[MAX_EQ_COL];
+ UBool ret=TRUE;
+ for (int32_t i=0; i<MAX_EQ_ROW; ++i) {
+ PluralRules* rules[MAX_EQ_COL];
+
+ for (int32_t j=0; j<MAX_EQ_COL; ++j) {
+ rules[j]=NULL;
+ }
+ int32_t totalRules=0;
+ while((totalRules<MAX_EQ_COL) && (testEquRules[i][totalRules].length()>0) ) {
+ rules[totalRules]=test->createRules(testEquRules[i][totalRules], status);
+ totalRules++;
+ }
+ for (int32_t n=0; n<300 && ret ; ++n) {
+ for(int32_t j=0; j<totalRules;++j) {
+ key[j] = rules[j]->select(n);
+ }
+ for(int32_t j=0; j<totalRules-1;++j) {
+ if (key[j]!=key[j+1]) {
+ ret= FALSE;
+ break;
+ }
+ }
+
+ }
+ for (int32_t j=0; j<MAX_EQ_COL; ++j) {
+ if (rules[j]!=NULL) {
+ delete rules[j];
+ }
+ }
+ }
+
+ return ret;
+}
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
Modified: trunk/source/test/intltest/srchtest.cpp
===================================================================
--- trunk/source/test/intltest/srchtest.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/intltest/srchtest.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/*
*****************************************************************************
-* Copyright (C) 2001-2006, International Business Machines orporation
+* Copyright (C) 2001-2008, International Business Machines orporation
* and others. All Rights Reserved.
****************************************************************************/
@@ -154,7 +154,7 @@
CASE(33, TestUClassID)
CASE(34, TestSubclass)
CASE(35, TestCoverage)
- CASE(36, TestDiactricMatch)
+ CASE(36, TestDiacriticMatch)
default: name = ""; break;
}
}
@@ -256,8 +256,8 @@
char *str = toCharString(strsrch->getText());
errln("Text: %s", str);
str = toCharString(strsrch->getPattern());
- errln("Pattern: %s", str);
- errln("Error following match found at %d %d",
+ infoln("Pattern: %s", str);
+ infoln("Error following match found at %d %d",
strsrch->getMatchedStart(), strsrch->getMatchedLength());
return FALSE;
}
@@ -375,7 +375,7 @@
if( strsrch2 == strsrch || *strsrch2 != *strsrch ||
!assertEqualWithStringSearch(strsrch2, search)
) {
- errln("failure with StringSearch.clone()");
+ infoln("failure with StringSearch.clone()");
collator->setStrength(getECollationStrength(UCOL_TERTIARY));
delete strsrch;
delete strsrch2;
@@ -395,6 +395,7 @@
BreakIterator *breaker = getBreakIterator(search->breaker);
StringSearch *strsrch;
UChar temp[128];
+ UBool result = TRUE;
#if UCONFIG_NO_BREAK_ITERATION
if(search->breaker) {
@@ -415,22 +416,27 @@
}
#endif
collator->setStrength(getECollationStrength(search->strength));
+ collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
strsrch = new StringSearch(pattern, text, (RuleBasedCollator *)collator,
breaker, status);
strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
if (U_FAILURE(status)) {
errln("Error opening string search %s", u_errorName(status));
- return FALSE;
+ result = FALSE;
+ goto bail;
}
if (!assertEqualWithStringSearch(strsrch, search)) {
- collator->setStrength(getECollationStrength(UCOL_TERTIARY));
- delete strsrch;
- return FALSE;
+ result = FALSE;
+ goto bail;
}
+
+bail:
collator->setStrength(getECollationStrength(UCOL_TERTIARY));
+ collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
delete strsrch;
- return TRUE;
+
+ return result;
}
UBool StringSearchTest::assertEqualWithAttribute(const SearchData *search,
@@ -681,7 +687,7 @@
while (BASIC[count].text != NULL) {
//printf("count %d", count);
if (!assertEqual(&BASIC[count])) {
- errln("Error at test number %d", count);
+ infoln("Error at test number %d", count);
}
count ++;
}
@@ -698,14 +704,14 @@
}
while (BASIC[count].text != NULL) {
if (!assertEqual(&BASIC[count])) {
- errln("Error at test number %d", count);
+ infoln("Error at test number %d", count);
}
count ++;
}
count = 0;
while (NORMEXACT[count].text != NULL) {
if (!assertEqual(&NORMEXACT[count])) {
- errln("Error at test number %d", count);
+ infoln("Error at test number %d", count);
}
count ++;
}
@@ -713,7 +719,7 @@
count = 0;
while (NONNORMEXACT[count].text != NULL) {
if (!assertEqual(&NONNORMEXACT[count])) {
- errln("Error at test number %d", count);
+ infoln("Error at test number %d", count);
}
count ++;
}
@@ -724,7 +730,7 @@
int count = 0;
while (STRENGTH[count].text != NULL) {
if (!assertEqual(&STRENGTH[count])) {
- errln("Error at test number %d", count);
+ infoln("Error at test number %d", count);
}
count ++;
}
@@ -810,7 +816,7 @@
}
strsrch->reset();
if (!assertEqualWithStringSearch(strsrch, search)) {
- errln("Error at test number %d", count);
+ infoln("Error at test number %d", count);
}
delete strsrch;
count += 2;
@@ -818,7 +824,7 @@
count = 0;
while (BREAKITERATOREXACT[count].text != NULL) {
if (!assertEqual(&BREAKITERATOREXACT[count])) {
- errln("Error at test number %d", count);
+ infoln("Error at test number %d", count);
}
count ++;
}
@@ -838,7 +844,7 @@
while (VARIABLE[count].text != NULL) {
logln("variable %d", count);
if (!assertEqual(&VARIABLE[count])) {
- errln("Error at test number %d", count);
+ infoln("Error at test number %d", count);
}
count ++;
}
@@ -1546,7 +1552,7 @@
delete collator;
}
-void StringSearchTest::TestDiactricMatch()
+void StringSearchTest::TestDiacriticMatch()
{
UChar temp[128];
UErrorCode status = U_ZERO_ERROR;
@@ -1559,7 +1565,7 @@
const SearchData *search;
- search = &(DIACTRICMATCH[count]);
+ search = &(DIACRITICMATCH[count]);
while (search->text != NULL) {
coll = getCollator(search->collator);
coll->setStrength(getECollationStrength(search->strength));
@@ -1577,7 +1583,7 @@
if (!assertEqualWithStringSearch(strsrch, search)) {
errln("Error at test number %d", count);
}
- search = &(DIACTRICMATCH[++count]);
+ search = &(DIACRITICMATCH[++count]);
delete strsrch;
}
@@ -1818,6 +1824,8 @@
if (tailored != NULL) {
delete tailored;
}
+
+ return;
}
strsrch->setCollator(m_en_us_, status);
@@ -1980,6 +1988,10 @@
UnicodeString pattern("pattern");
StringSearch *strsrch = new StringSearch(pattern, text, m_en_us_, NULL,
status);
+ Collator *collator = strsrch->getCollator();
+
+ collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
+
strsrch->setAttribute(USEARCH_CANONICAL_MATCH, USEARCH_ON, status);
/* testing out of bounds error */
strsrch->setOffset(-1, status);
@@ -2023,7 +2035,7 @@
errln("Error match found at %d %d",
strsrch->getMatchedStart(),
strsrch->getMatchedLength());
- return;
+ goto bail;
}
matchindex = search.offset[count + 1] == -1 ? -1 :
search.offset[count + 2];
@@ -2031,7 +2043,7 @@
strsrch->setOffset(search.offset[count + 1] + 1, status);
if (strsrch->getOffset() != search.offset[count + 1] + 1) {
errln("Error setting offset");
- return;
+ goto bail;
}
}
@@ -2045,9 +2057,12 @@
errln("Pattern: %s", str);
errln("Error match found at %d %d", strsrch->getMatchedStart(),
strsrch->getMatchedLength());
- return;
+ goto bail;
}
}
+
+bail:
+ collator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
delete strsrch;
}
Modified: trunk/source/test/intltest/srchtest.h
===================================================================
--- trunk/source/test/intltest/srchtest.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/intltest/srchtest.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/****************************************************************************
* COPYRIGHT:
- * Copyright (c) 2001-2005, International Business Machines Corporation and others
+ * Copyright (c) 2001-2008, International Business Machines Corporation and others
* All Rights Reserved.
***************************************************************************/
@@ -88,7 +88,7 @@
void TestUClassID();
void TestSubclass();
void TestCoverage();
- void TestDiactricMatch();
+ void TestDiacriticMatch();
};
#endif /* #if !UCONFIG_NO_COLLATION */
Added: trunk/source/test/intltest/ssearch.cpp
===================================================================
--- trunk/source/test/intltest/ssearch.cpp (rev 0)
+++ trunk/source/test/intltest/ssearch.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,1675 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2008, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/putil.h"
+#include "unicode/usearch.h"
+
+#include "cmemory.h"
+#include "unicode/coll.h"
+#include "unicode/tblcoll.h"
+#include "unicode/coleitr.h"
+#include "unicode/ucoleitr.h"
+
+#include "unicode/regex.h" // TODO: make conditional on regexp being built.
+
+#include "unicode/uniset.h"
+#include "unicode/uset.h"
+#include "unicode/ustring.h"
+#include "hash.h"
+#include "uhash.h"
+#include "ucol_imp.h"
+
+#include "intltest.h"
+#include "ssearch.h"
+
+#include "xmlparser.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+char testId[100];
+
+#define TEST_ASSERT(x) {if (!(x)) { \
+ errln("Failure in file %s, line %d, test ID = \"%s\"", __FILE__, __LINE__, testId);}}
+
+#define TEST_ASSERT_M(x, m) {if (!(x)) { \
+ errln("Failure in file %s, line %d. \"%s\"", __FILE__, __LINE__, m);return;}}
+
+#define TEST_ASSERT_SUCCESS(errcode) {if (U_FAILURE(errcode)) { \
+ errln("Failure in file %s, line %d, test ID \"%s\", status = \"%s\"", \
+ __FILE__, __LINE__, testId, u_errorName(errcode));}}
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+//---------------------------------------------------------------------------
+//
+// Test class boilerplate
+//
+//---------------------------------------------------------------------------
+SSearchTest::SSearchTest()
+{
+}
+
+SSearchTest::~SSearchTest()
+{
+}
+
+void SSearchTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *params )
+{
+ if (exec) logln("TestSuite SSearchTest: ");
+ switch (index) {
+ case 0: name = "searchTest";
+ if (exec) searchTest();
+ break;
+
+ case 1: name = "offsetTest";
+ if (exec) offsetTest();
+ break;
+
+ case 2: name = "monkeyTest";
+ if (exec) monkeyTest(params);
+ break;
+
+ default: name = "";
+ break; //needed to end loop
+ }
+}
+
+
+
+#define PATH_BUFFER_SIZE 2048
+const char *SSearchTest::getPath(char buffer[2048], const char *filename) {
+ UErrorCode status = U_ZERO_ERROR;
+ const char *testDataDirectory = IntlTest::getSourceTestData(status);
+
+ if (U_FAILURE(status) || strlen(testDataDirectory) + strlen(filename) + 1 >= PATH_BUFFER_SIZE) {
+ errln("ERROR: getPath() failed - %s", u_errorName(status));
+ return NULL;
+ }
+
+ strcpy(buffer, testDataDirectory);
+ strcat(buffer, filename);
+ return buffer;
+}
+
+
+void SSearchTest::searchTest()
+{
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+ UErrorCode status = U_ZERO_ERROR;
+ char path[PATH_BUFFER_SIZE];
+ const char *testFilePath = getPath(path, "ssearch.xml");
+
+ if (testFilePath == NULL) {
+ return; /* Couldn't get path: error message already output. */
+ }
+
+ UXMLParser *parser = UXMLParser::createParser(status);
+ TEST_ASSERT_SUCCESS(status);
+ UXMLElement *root = parser->parseFile(testFilePath, status);
+ TEST_ASSERT_SUCCESS(status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ const UnicodeString *debugTestCase = root->getAttribute("debug");
+ if (debugTestCase != NULL) {
+// setenv("USEARCH_DEBUG", "1", 1);
+ }
+
+
+ const UXMLElement *testCase;
+ int32_t tc = 0;
+
+ while((testCase = root->nextChildElement(tc)) != NULL) {
+
+ if (testCase->getTagName().compare("test-case") != 0) {
+ errln("ssearch, unrecognized XML Element in test file");
+ continue;
+ }
+ const UnicodeString *id = testCase->getAttribute("id");
+ *testId = 0;
+ if (id != NULL) {
+ id->extract(0, id->length(), testId, sizeof(testId), US_INV);
+ }
+
+ // If debugging test case has been specified and this is not it, skip to next.
+ if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) {
+ continue;
+ }
+ //
+ // Get the requested collation strength.
+ // Default is tertiary if the XML attribute is missing from the test case.
+ //
+ const UnicodeString *strength = testCase->getAttribute("strength");
+ UColAttributeValue collatorStrength;
+ if (strength==NULL) { collatorStrength = UCOL_TERTIARY;}
+ else if (*strength=="PRIMARY") { collatorStrength = UCOL_PRIMARY;}
+ else if (*strength=="SECONDARY") { collatorStrength = UCOL_SECONDARY;}
+ else if (*strength=="TERTIARY") { collatorStrength = UCOL_TERTIARY;}
+ else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;}
+ else if (*strength=="IDENTICAL") { collatorStrength = UCOL_IDENTICAL;}
+ else {
+ // Bogus value supplied for strength. Shouldn't happen, even from
+ // typos, if the XML source has been validated.
+ // This assert is a little deceiving in that strength can be
+ // any of the allowed values, not just TERTIARY, but it will
+ // do the job of getting the error output.
+ TEST_ASSERT(*strength=="TERTIARY")
+ }
+
+ //
+ // Get the collator normalization flag. Default is UCOL_OFF.
+ //
+ UColAttributeValue normalize = UCOL_OFF;
+ const UnicodeString *norm = testCase->getAttribute("norm");
+ TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF");
+ if (norm!=NULL && *norm=="ON") {
+ normalize = UCOL_ON;
+ }
+
+ const UnicodeString defLocale("en");
+ char clocale[100];
+ const UnicodeString *locale = testCase->getAttribute("locale");
+ if (locale == NULL || locale->length()==0) {
+ locale = &defLocale;
+ };
+ locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL);
+
+
+ UnicodeString text;
+ UnicodeString target;
+ UnicodeString pattern;
+ int32_t expectedMatchStart = -1;
+ int32_t expectedMatchLimit = -1;
+ const UXMLElement *n;
+ int nodeCount = 0;
+
+ n = testCase->getChildElement("pattern");
+ TEST_ASSERT(n != NULL);
+ if (n==NULL) {
+ continue;
+ }
+ text = n->getText(FALSE);
+ text = text.unescape();
+ pattern.append(text);
+ nodeCount++;
+
+ n = testCase->getChildElement("pre");
+ if (n!=NULL) {
+ text = n->getText(FALSE);
+ text = text.unescape();
+ target.append(text);
+ nodeCount++;
+ }
+
+ n = testCase->getChildElement("m");
+ if (n!=NULL) {
+ expectedMatchStart = target.length();
+ text = n->getText(FALSE);
+ text = text.unescape();
+ target.append(text);
+ expectedMatchLimit = target.length();
+ nodeCount++;
+ }
+
+ n = testCase->getChildElement("post");
+ if (n!=NULL) {
+ text = n->getText(FALSE);
+ text = text.unescape();
+ target.append(text);
+ nodeCount++;
+ }
+
+ // Check that there weren't extra things in the XML
+ TEST_ASSERT(nodeCount == testCase->countChildren());
+
+ // Open a collotor and StringSearch based on the parameters
+ // obtained from the XML.
+ //
+ status = U_ZERO_ERROR;
+ UCollator *collator = ucol_open(clocale, &status);
+ ucol_setStrength(collator, collatorStrength);
+ ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, normalize, &status);
+ UStringSearch *uss = usearch_openFromCollator(pattern.getBuffer(), pattern.length(),
+ target.getBuffer(), target.length(),
+ collator,
+ NULL, // the break iterator
+ &status);
+
+ TEST_ASSERT_SUCCESS(status);
+ if (U_FAILURE(status)) {
+ usearch_close(uss);
+ ucol_close(collator);
+ continue;
+ }
+
+ int32_t foundStart = 0;
+ int32_t foundLimit = 0;
+ UBool foundMatch;
+
+ //
+ // Do the search, check the match result against the expected results.
+ //
+ foundMatch= usearch_search(uss, 0, &foundStart, &foundLimit, &status);
+ TEST_ASSERT_SUCCESS(status);
+ if (foundMatch && expectedMatchStart<0 ||
+ foundStart != expectedMatchStart ||
+ foundLimit != expectedMatchLimit) {
+ TEST_ASSERT(FALSE); // ouput generic error position
+ infoln("Found, expected match start = %d, %d \n"
+ "Found, expected match limit = %d, %d",
+ foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
+ }
+
+ // In case there are other matches...
+ // (should we only do this if the test case passed?)
+ while (foundMatch) {
+ expectedMatchStart = foundStart;
+ expectedMatchLimit = foundLimit;
+
+ foundMatch = usearch_search(uss, foundLimit, &foundStart, &foundLimit, &status);
+ }
+
+ usearch_close(uss);
+ usearch_openFromCollator(pattern.getBuffer(), pattern.length(),
+ target.getBuffer(), target.length(),
+ collator,
+ NULL,
+ &status);
+
+ //
+ // Do the backwards search, check the match result against the expected results.
+ //
+ foundMatch= usearch_searchBackwards(uss, target.length(), &foundStart, &foundLimit, &status);
+ TEST_ASSERT_SUCCESS(status);
+ if (foundMatch && expectedMatchStart<0 ||
+ foundStart != expectedMatchStart ||
+ foundLimit != expectedMatchLimit) {
+ TEST_ASSERT(FALSE); // ouput generic error position
+ infoln("Found, expected backwards match start = %d, %d \n"
+ "Found, expected backwards match limit = %d, %d",
+ foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
+ }
+
+ usearch_close(uss);
+ ucol_close(collator);
+ }
+
+ delete root;
+ delete parser;
+#endif
+}
+
+struct Order
+{
+ int32_t order;
+ int32_t lowOffset;
+ int32_t highOffset;
+};
+
+class OrderList
+{
+public:
+ OrderList();
+ OrderList(UCollator *coll, const UnicodeString &string, int32_t stringOffset = 0);
+ ~OrderList();
+
+ int32_t size(void) const;
+ void add(int32_t order, int32_t low, int32_t high);
+ const Order *get(int32_t index) const;
+ int32_t getLowOffset(int32_t index) const;
+ int32_t getHighOffset(int32_t index) const;
+ int32_t getOrder(int32_t index) const;
+ void reverse(void);
+ UBool compare(const OrderList &other) const;
+ UBool matchesAt(int32_t offset, const OrderList &other) const;
+
+private:
+ Order *list;
+ int32_t listMax;
+ int32_t listSize;
+};
+
+OrderList::OrderList()
+ : list(NULL), listSize(0), listMax(16)
+{
+ list = new Order[listMax];
+}
+
+OrderList::OrderList(UCollator *coll, const UnicodeString &string, int32_t stringOffset)
+ : list(NULL), listMax(16), listSize(0)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
+ uint32_t strengthMask = 0;
+ int32_t order, low, high;
+
+ switch (ucol_getStrength(coll))
+ {
+ default:
+ strengthMask |= UCOL_TERTIARYORDERMASK;
+ /* fall through */
+
+ case UCOL_SECONDARY:
+ strengthMask |= UCOL_SECONDARYORDERMASK;
+ /* fall through */
+
+ case UCOL_PRIMARY:
+ strengthMask |= UCOL_PRIMARYORDERMASK;
+ }
+
+ list = new Order[listMax];
+
+ ucol_setOffset(elems, stringOffset, &status);
+
+ do {
+ low = ucol_getOffset(elems);
+ order = ucol_next(elems, &status);
+ high = ucol_getOffset(elems);
+
+ if (order != UCOL_NULLORDER) {
+ order &= strengthMask;
+ }
+
+ if (order != UCOL_IGNORABLE) {
+ add(order, low, high);
+ }
+ } while (order != UCOL_NULLORDER);
+
+ ucol_closeElements(elems);
+}
+
+OrderList::~OrderList()
+{
+ delete[] list;
+}
+
+void OrderList::add(int32_t order, int32_t low, int32_t high)
+{
+ if (listSize >= listMax) {
+ listMax *= 2;
+
+ Order *newList = new Order[listMax];
+
+ uprv_memcpy(newList, list, listSize * sizeof(Order));
+ delete[] list;
+ list = newList;
+ }
+
+ list[listSize].order = order;
+ list[listSize].lowOffset = low;
+ list[listSize].highOffset = high;
+
+ listSize += 1;
+}
+
+const Order *OrderList::get(int32_t index) const
+{
+ if (index >= listSize) {
+ return NULL;
+ }
+
+ return &list[index];
+}
+
+int32_t OrderList::getLowOffset(int32_t index) const
+{
+ const Order *order = get(index);
+
+ if (order != NULL) {
+ return order->lowOffset;
+ }
+
+ return -1;
+}
+
+int32_t OrderList::getHighOffset(int32_t index) const
+{
+ const Order *order = get(index);
+
+ if (order != NULL) {
+ return order->highOffset;
+ }
+
+ return -1;
+}
+
+int32_t OrderList::getOrder(int32_t index) const
+{
+ const Order *order = get(index);
+
+ if (order != NULL) {
+ return order->order;
+ }
+
+ return UCOL_NULLORDER;
+}
+
+int32_t OrderList::size() const
+{
+ return listSize;
+}
+
+void OrderList::reverse()
+{
+ for(int32_t f = 0, b = listSize - 1; f < b; f += 1, b -= 1) {
+ Order swap = list[b];
+
+ list[b] = list[f];
+ list[f] = swap;
+ }
+}
+
+UBool OrderList::compare(const OrderList &other) const
+{
+ if (listSize != other.listSize) {
+ return FALSE;
+ }
+
+ for(int32_t i = 0; i < listSize; i += 1) {
+ if (list[i].order != other.list[i].order ||
+ list[i].lowOffset != other.list[i].lowOffset ||
+ list[i].highOffset != other.list[i].highOffset) {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+UBool OrderList::matchesAt(int32_t offset, const OrderList &other) const
+{
+ // NOTE: sizes include the NULLORDER, which we don't want to compare.
+ int32_t otherSize = other.size() - 1;
+
+ if (listSize - 1 - offset < otherSize) {
+ return FALSE;
+ }
+
+ for (int32_t i = offset, j = 0; j < otherSize; i += 1, j += 1) {
+ if (getOrder(i) != other.getOrder(j)) {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+static char *printOffsets(char *buffer, OrderList &list)
+{
+ int32_t size = list.size();
+ char *s = buffer;
+
+ for(int32_t i = 0; i < size; i += 1) {
+ const Order *order = list.get(i);
+
+ if (i != 0) {
+ s += sprintf(s, ", ");
+ }
+
+ s += sprintf(s, "(%d, %d)", order->lowOffset, order->highOffset);
+ }
+
+ return buffer;
+}
+
+static char *printOrders(char *buffer, OrderList &list)
+{
+ int32_t size = list.size();
+ char *s = buffer;
+
+ for(int32_t i = 0; i < size; i += 1) {
+ const Order *order = list.get(i);
+
+ if (i != 0) {
+ s += sprintf(s, ", ");
+ }
+
+ s += sprintf(s, "%8.8X", order->order);
+ }
+
+ return buffer;
+}
+
+void SSearchTest::offsetTest()
+{
+ UnicodeString test[] = {
+ "\\ua191\\u16ef\\u2036\\u017a",
+
+#if 0
+ // This results in a complex interaction between contraction,
+ // expansion and normalization that confuses the backwards offset fixups.
+ "\\u0F7F\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
+#endif
+
+ "\\u0F80\\u0F81\\u0F82\\u0F83\\u0F84\\u0F85",
+ "\\u07E9\\u07EA\\u07F1\\u07F2\\u07F3",
+
+ "\\u02FE\\u02FF"
+ "\\u0300\\u0301\\u0302\\u0303\\u0304\\u0305\\u0306\\u0307\\u0308\\u0309\\u030A\\u030B\\u030C\\u030D\\u030E\\u030F"
+ "\\u0310\\u0311\\u0312\\u0313\\u0314\\u0315\\u0316\\u0317\\u0318\\u0319\\u031A\\u031B\\u031C\\u031D\\u031E\\u031F"
+ "\\u0320\\u0321\\u0322\\u0323\\u0324\\u0325\\u0326\\u0327\\u0328\\u0329\\u032A\\u032B\\u032C\\u032D\\u032E\\u032F"
+ "\\u0330\\u0331\\u0332\\u0333\\u0334\\u0335\\u0336\\u0337\\u0338\\u0339\\u033A\\u033B\\u033C\\u033D\\u033E\\u033F"
+ "\\u0340\\u0341\\u0342\\u0343\\u0344\\u0345\\u0346\\u0347\\u0348\\u0349\\u034A\\u034B\\u034C\\u034D\\u034E",
+
+ "\\u02FE\\u02FF\\u0300\\u0301\\u0302\\u0303\\u0316\\u0317\\u0318",
+ "abc\\u0E41\\u0301\\u0316",
+ "abc\\u0E41\\u0316\\u0301",
+ "\\u0E41\\u0301\\u0316",
+ "\\u0E41\\u0316\\u0301",
+ "a\\u0301\\u0316",
+ "a\\u0316\\u0301",
+ "\\uAC52\\uAC53",
+ "\\u34CA\\u34CB",
+ "\\u11ED\\u11EE",
+ "\\u30C3\\u30D0",
+ "p\\u00E9ch\\u00E9",
+ "a\\u0301\\u0325",
+ "a\\u0300\\u0325",
+ "a\\u0325\\u0300",
+ "A\\u0323\\u0300B",
+ "A\\u0300\\u0323B",
+ "A\\u0301\\u0323B",
+ "A\\u0302\\u0301\\u0323B",
+ "abc",
+ "ab\\u0300c",
+ "ab\\u0300\\u0323c",
+ " \\uD800\\uDC00\\uDC00",
+ "a\\uD800\\uDC00\\uDC00",
+ "A\\u0301\\u0301",
+ "A\\u0301\\u0323",
+ "A\\u0301\\u0323B",
+ "B\\u0301\\u0323C",
+ "A\\u0300\\u0323B",
+ "\\u0301A\\u0301\\u0301",
+ "abcd\\r\\u0301",
+ "p\\u00EAche",
+ "pe\\u0302che",
+ };
+
+ int32_t testCount = ARRAY_SIZE(test);
+ UErrorCode status = U_ZERO_ERROR;
+ RuleBasedCollator *col = (RuleBasedCollator *) Collator::createInstance(Locale::getEnglish(), status);
+ char buffer[4096]; // A bit of a hack... just happens to be long enough for all the test cases...
+ // We could allocate one that's the right size by (CE_count * 10) + 2
+ // 10 chars is enough room for 8 hex digits plus ", ". 2 extra chars for "[" and "]"
+
+ col->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
+
+ for(int32_t i = 0; i < testCount; i += 1) {
+ UnicodeString ts = test[i].unescape();
+ CollationElementIterator *iter = col->createCollationElementIterator(ts);
+ OrderList forwardList;
+ OrderList backwardList;
+ int32_t order, low, high;
+
+ do {
+ low = iter->getOffset();
+ order = iter->next(status);
+ high = iter->getOffset();
+
+ forwardList.add(order, low, high);
+ } while (order != CollationElementIterator::NULLORDER);
+
+ iter->reset();
+ iter->setOffset(ts.length(), status);
+
+ backwardList.add(CollationElementIterator::NULLORDER, iter->getOffset(), iter->getOffset());
+
+ do {
+ high = iter->getOffset();
+ order = iter->previous(status);
+ low = iter->getOffset();
+
+ if (order == CollationElementIterator::NULLORDER) {
+ break;
+ }
+
+ backwardList.add(order, low, high);
+ } while (TRUE);
+
+ backwardList.reverse();
+
+ if (forwardList.compare(backwardList)) {
+ logln("Works with \"%S\"", test[i].getTerminatedBuffer());
+ logln("Forward offsets: [%s]", printOffsets(buffer, forwardList));
+// logln("Backward offsets: [%s]", printOffsets(buffer, backwardList));
+
+ logln("Forward CEs: [%s]", printOrders(buffer, forwardList));
+// logln("Backward CEs: [%s]", printOrders(buffer, backwardList));
+
+ logln();
+ } else {
+ errln("Fails with \"%S\"", test[i].getTerminatedBuffer());
+ infoln("Forward offsets: [%s]", printOffsets(buffer, forwardList));
+ infoln("Backward offsets: [%s]", printOffsets(buffer, backwardList));
+
+ infoln("Forward CEs: [%s]", printOrders(buffer, forwardList));
+ infoln("Backward CEs: [%s]", printOrders(buffer, backwardList));
+
+ infoln();
+ }
+ }
+}
+
+class CEList
+{
+public:
+ CEList(UCollator *coll, const UnicodeString &string);
+ ~CEList();
+
+ int32_t size() const;
+ int32_t get(int32_t index) const;
+ UBool matchesAt(int32_t offset, const CEList *other) const;
+
+private:
+ void add(int32_t ce);
+
+ int32_t *ces;
+ int32_t listMax;
+ int32_t listSize;
+};
+
+CEList::CEList(UCollator *coll, const UnicodeString &string)
+ : ces(NULL), listMax(8), listSize(0)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
+ uint32_t strengthMask = 0;
+ int32_t order;
+
+#if 0
+ switch (ucol_getStrength(coll))
+ {
+ default:
+ strengthMask |= UCOL_TERTIARYORDERMASK;
+ /* fall through */
+
+ case UCOL_SECONDARY:
+ strengthMask |= UCOL_SECONDARYORDERMASK;
+ /* fall through */
+
+ case UCOL_PRIMARY:
+ strengthMask |= UCOL_PRIMARYORDERMASK;
+ }
+#else
+ strengthMask = UCOL_PRIMARYORDERMASK;
+#endif
+
+ ces = new int32_t[listMax];
+
+ while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) {
+ order &= strengthMask;
+
+ if (order == UCOL_IGNORABLE) {
+ continue;
+ }
+
+ add(order);
+ }
+
+ ucol_closeElements(elems);
+}
+
+CEList::~CEList()
+{
+ delete[] ces;
+}
+
+void CEList::add(int32_t ce)
+{
+ if (listSize >= listMax) {
+ listMax *= 2;
+
+ int32_t *newCEs = new int32_t[listMax];
+
+ uprv_memcpy(newCEs, ces, listSize * sizeof(int32_t));
+ delete[] ces;
+ ces = newCEs;
+ }
+
+ ces[listSize++] = ce;
+}
+
+int32_t CEList::get(int32_t index) const
+{
+ if (index >= 0 && index < listSize) {
+ return ces[index];
+ }
+
+ return -1;
+}
+
+UBool CEList::matchesAt(int32_t offset, const CEList *other) const
+{
+ if (listSize - offset < other->size()) {
+ return FALSE;
+ }
+
+ for (int32_t i = offset, j = 0; j < other->size(); i += 1, j += 1) {
+ if (ces[i] != other->get(j)) {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+int32_t CEList::size() const
+{
+ return listSize;
+}
+
+class StringList
+{
+public:
+ StringList();
+ ~StringList();
+
+ void add(const UnicodeString *string);
+ void add(const UChar *chars, int32_t count);
+ const UnicodeString *get(int32_t index) const;
+ int32_t size() const;
+
+private:
+ UnicodeString *strings;
+ int32_t listMax;
+ int32_t listSize;
+};
+
+StringList::StringList()
+ : strings(NULL), listMax(16), listSize(0)
+{
+ strings = new UnicodeString [listMax];
+}
+
+StringList::~StringList()
+{
+ delete[] strings;
+}
+
+void StringList::add(const UnicodeString *string)
+{
+ if (listSize >= listMax) {
+ listMax *= 2;
+
+ UnicodeString *newStrings = new UnicodeString[listMax];
+
+ uprv_memcpy(newStrings, strings, listSize * sizeof(UnicodeString));
+
+ delete[] strings;
+ strings = newStrings;
+ }
+
+ // The ctor initialized all the strings in
+ // the array to empty strings, so this
+ // is the same as copying the source string.
+ strings[listSize++].append(*string);
+}
+
+void StringList::add(const UChar *chars, int32_t count)
+{
+ const UnicodeString string(chars, count);
+
+ add(&string);
+}
+
+const UnicodeString *StringList::get(int32_t index) const
+{
+ if (index >= 0 && index < listSize) {
+ return &strings[index];
+ }
+
+ return NULL;
+}
+
+int32_t StringList::size() const
+{
+ return listSize;
+}
+
+class CEToStringsMap
+{
+public:
+
+ CEToStringsMap();
+ ~CEToStringsMap();
+
+ void put(int32_t ce, UnicodeString *string);
+ StringList *getStringList(int32_t ce) const;
+
+private:
+
+ static void deleteStringList(void *obj);
+ void putStringList(int32_t ce, StringList *stringList);
+ UHashtable *map;
+};
+
+CEToStringsMap::CEToStringsMap()
+{
+ UErrorCode status = U_ZERO_ERROR;
+
+ map = uhash_open(uhash_hashLong, uhash_compareLong,
+ uhash_compareCaselessUnicodeString,
+ &status);
+
+ uhash_setValueDeleter(map, deleteStringList);
+}
+
+CEToStringsMap::~CEToStringsMap()
+{
+ uhash_close(map);
+}
+
+void CEToStringsMap::put(int32_t ce, UnicodeString *string)
+{
+ StringList *strings = getStringList(ce);
+
+ if (strings == NULL) {
+ strings = new StringList();
+ putStringList(ce, strings);
+ }
+
+ strings->add(string);
+}
+
+StringList *CEToStringsMap::getStringList(int32_t ce) const
+{
+ return (StringList *) uhash_iget(map, ce);
+}
+
+void CEToStringsMap::putStringList(int32_t ce, StringList *stringList)
+{
+ UErrorCode status = U_ZERO_ERROR;
+
+ uhash_iput(map, ce, (void *) stringList, &status);
+}
+
+void CEToStringsMap::deleteStringList(void *obj)
+{
+ StringList *strings = (StringList *) obj;
+
+ delete strings;
+}
+
+class StringToCEsMap
+{
+public:
+ StringToCEsMap();
+ ~StringToCEsMap();
+
+ void put(const UnicodeString *string, const CEList *ces);
+ const CEList *get(const UnicodeString *string);
+
+private:
+
+ static void deleteCEList(void *obj);
+
+ UHashtable *map;
+};
+
+StringToCEsMap::StringToCEsMap()
+{
+ UErrorCode status = U_ZERO_ERROR;
+
+ map = uhash_open(uhash_hashCaselessUnicodeString,
+ uhash_compareCaselessUnicodeString,
+ uhash_compareLong,
+ &status);
+
+ uhash_setValueDeleter(map, deleteCEList);
+}
+
+StringToCEsMap::~StringToCEsMap()
+{
+ uhash_close(map);
+}
+
+void StringToCEsMap::put(const UnicodeString *string, const CEList *ces)
+{
+ UErrorCode status = U_ZERO_ERROR;
+
+ uhash_put(map, (void *) string, (void *) ces, &status);
+}
+
+const CEList *StringToCEsMap::get(const UnicodeString *string)
+{
+ return (const CEList *) uhash_get(map, string);
+}
+
+void StringToCEsMap::deleteCEList(void *obj)
+{
+ CEList *list = (CEList *) obj;
+
+ delete list;
+}
+
+static void buildData(UCollator *coll, USet *charsToTest, StringToCEsMap *charsToCEList, CEToStringsMap *ceToCharsStartingWith)
+{
+ int32_t itemCount = uset_getItemCount(charsToTest);
+ UErrorCode status = U_ZERO_ERROR;
+
+ for(int32_t item = 0; item < itemCount; item += 1) {
+ UChar32 start = 0, end = 0;
+ UChar buffer[16];
+ int32_t len = uset_getItem(charsToTest, item, &start, &end,
+ buffer, 16, &status);
+
+ if (len == 0) {
+ for (UChar32 ch = start; ch <= end; ch += 1) {
+ UnicodeString *st = new UnicodeString(ch);
+ CEList *ceList = new CEList(coll, *st);
+
+ charsToCEList->put(st, ceList);
+ ceToCharsStartingWith->put(ceList->get(0), st);
+ }
+ } else if (len > 0) {
+ UnicodeString *st = new UnicodeString(buffer, len);
+ CEList *ceList = new CEList(coll, *st);
+
+ charsToCEList->put(st, ceList);
+ ceToCharsStartingWith->put(ceList->get(0), st);
+ } else {
+ // shouldn't happen...
+ }
+ }
+}
+
+static UnicodeString &escape(const UnicodeString &string, UnicodeString &buffer)
+{
+ for(int32_t i = 0; i < string.length(); i += 1) {
+ UChar32 ch = string.char32At(i);
+
+ if (ch >= 0x0020 && ch <= 0x007F) {
+ if (ch == 0x005C) {
+ buffer.append("\\\\");
+ } else {
+ buffer.append(ch);
+ }
+ } else {
+ char cbuffer[12];
+
+ if (ch <= 0xFFFFL) {
+ sprintf(cbuffer, "\\u%4.4X", ch);
+ } else {
+ sprintf(cbuffer, "\\U%8.8X", ch);
+ }
+
+ buffer.append(cbuffer);
+ }
+
+ if (ch >= 0x10000L) {
+ i += 1;
+ }
+ }
+
+ return buffer;
+}
+
+static int32_t minLengthInChars(const CEList *ceList, int32_t offset, StringToCEsMap *charsToCEList, CEToStringsMap *ceToCharsStartingWith,
+ UnicodeString &debug)
+{
+ // find out shortest string for the longest sequence of ces.
+ // needs to be refined to use dynamic programming, but will be roughly right
+ int32_t totalStringLength = 0;
+
+ while (offset < ceList->size()) {
+ int32_t ce = ceList->get(offset);
+ int32_t bestLength = INT32_MIN;
+ const UnicodeString *bestString = NULL;
+ int32_t bestCeLength = 0;
+ const StringList *strings = ceToCharsStartingWith->getStringList(ce);
+ int32_t stringCount = strings->size();
+
+ for (int32_t s = 0; s < stringCount; s += 1) {
+ const UnicodeString *string = strings->get(s);
+ const CEList *ceList2 = charsToCEList->get(string);
+
+ if (ceList->matchesAt(offset, ceList2)) {
+ int32_t length = ceList2->size() - string->length();
+
+ if (bestLength < length) {
+ bestLength = length;
+ bestCeLength = ceList2->size();
+ bestString = string;
+ }
+ }
+ }
+
+ totalStringLength += bestString->length();
+ escape(*bestString, debug).append("/");
+ offset += bestCeLength;
+ }
+
+ debug.append((UChar)0x0000);
+ return totalStringLength;
+}
+
+static void minLengthTest(UCollator *coll, StringToCEsMap *charsToCEList, CEToStringsMap *ceToCharsStartingWith)
+{
+ UnicodeString examples[] = {"fuss", "fiss", "affliss", "VII"};
+ UnicodeString debug;
+ int32_t nExamples = sizeof(examples) / sizeof(examples[0]);
+
+ for (int32_t s = 0; s < nExamples; s += 1) {
+ CEList *ceList = new CEList(coll, examples[s]);
+
+ //infoln("%S:", examples[s].getTerminatedBuffer());
+
+ for(int32_t i = 0; i < examples[s].length(); i += 1) {
+ debug.remove();
+
+ int32_t minLength = minLengthInChars(ceList, i, charsToCEList, ceToCharsStartingWith, debug);
+ //infoln("\t%d\t%S", minLength, debug.getTerminatedBuffer());
+ }
+
+ //infoln();
+ delete ceList;
+ }
+}
+
+//----------------------------------------------------------------------------------------
+//
+// Random Numbers. Similar to standard lib rand() and srand()
+// Not using library to
+// 1. Get same results on all platforms.
+// 2. Get access to current seed, to more easily reproduce failures.
+//
+//---------------------------------------------------------------------------------------
+static uint32_t m_seed = 1;
+
+static uint32_t m_rand()
+{
+ m_seed = m_seed * 1103515245 + 12345;
+ return (uint32_t)(m_seed/65536) % 32768;
+}
+
+class Monkey
+{
+public:
+ virtual void append(UnicodeString &test, UnicodeString &alternate) = 0;
+
+protected:
+ Monkey();
+ virtual ~Monkey();
+};
+
+Monkey::Monkey()
+{
+ // ook?
+}
+
+Monkey::~Monkey()
+{
+ // ook?
+}
+
+class SetMonkey : public Monkey
+{
+public:
+ SetMonkey(const USet *theSet);
+ ~SetMonkey();
+
+ virtual void append(UnicodeString &test, UnicodeString &alternate);
+
+private:
+ const USet *set;
+};
+
+SetMonkey::SetMonkey(const USet *theSet)
+ : Monkey(), set(theSet)
+{
+ // ook?
+}
+
+SetMonkey::~SetMonkey()
+{
+ //ook...
+}
+
+void SetMonkey::append(UnicodeString &test, UnicodeString &alternate)
+{
+ int32_t size = uset_size(set);
+ int32_t index = m_rand() % size;
+ UChar32 ch = uset_charAt(set, index);
+ UnicodeString str(ch);
+
+ test.append(str);
+ alternate.append(str); // flip case, or some junk?
+}
+
+class StringSetMonkey : public Monkey
+{
+public:
+ StringSetMonkey(const USet *theSet, UCollator *theCollator, StringToCEsMap *theCharsToCEList, CEToStringsMap *theCeToCharsStartingWith);
+ ~StringSetMonkey();
+
+ void append(UnicodeString &testCase, UnicodeString &alternate);
+
+private:
+ UnicodeString &generateAlternative(const UnicodeString &testCase, UnicodeString &alternate);
+
+ const USet *set;
+ UCollator *coll;
+ StringToCEsMap *charsToCEList;
+ CEToStringsMap *ceToCharsStartingWith;
+};
+
+StringSetMonkey::StringSetMonkey(const USet *theSet, UCollator *theCollator, StringToCEsMap *theCharsToCEList, CEToStringsMap *theCeToCharsStartingWith)
+: Monkey(), set(theSet), coll(theCollator), charsToCEList(theCharsToCEList), ceToCharsStartingWith(theCeToCharsStartingWith)
+{
+ // ook.
+}
+
+StringSetMonkey::~StringSetMonkey()
+{
+ // ook?
+}
+
+void StringSetMonkey::append(UnicodeString &testCase, UnicodeString &alternate)
+{
+ int32_t itemCount = uset_getItemCount(set), len = 0;
+ int32_t index = m_rand() % itemCount;
+ UChar32 rangeStart = 0, rangeEnd = 0;
+ UChar buffer[16];
+ UErrorCode err = U_ZERO_ERROR;
+
+ len = uset_getItem(set, index, &rangeStart, &rangeEnd, buffer, 16, &err);
+
+ if (len == 0) {
+ int32_t offset = m_rand() % (rangeEnd - rangeStart + 1);
+ UChar32 ch = rangeStart + offset;
+ UnicodeString str(ch);
+
+ testCase.append(str);
+ generateAlternative(str, alternate);
+ } else if (len > 0) {
+ // should check that len < 16...
+ UnicodeString str(buffer, len);
+
+ testCase.append(str);
+ generateAlternative(str, alternate);
+ } else {
+ // shouldn't happen...
+ }
+}
+
+UnicodeString &StringSetMonkey::generateAlternative(const UnicodeString &testCase, UnicodeString &alternate)
+{
+ // find out shortest string for the longest sequence of ces.
+ // needs to be refined to use dynamic programming, but will be roughly right
+ CEList ceList(coll, testCase);
+ UnicodeString alt;
+ int32_t offset = 0;
+
+ if (ceList.size() == 0) {
+ return alternate.append(testCase);
+ }
+
+ while (offset < ceList.size()) {
+ int32_t ce = ceList.get(offset);
+ const StringList *strings = ceToCharsStartingWith->getStringList(ce);
+
+ if (strings == NULL) {
+ return alternate.append(testCase);
+ }
+
+ int32_t stringCount = strings->size();
+ int32_t tries = 0;
+
+ // find random string that generates the same CEList
+ const CEList *ceList2;
+ const UnicodeString *string;
+
+ do {
+ int32_t s = m_rand() % stringCount;
+
+ if (tries++ > stringCount) {
+ alternate.append(testCase);
+ return alternate;
+ }
+
+ string = strings->get(s);
+ ceList2 = charsToCEList->get(string);
+ } while (! ceList.matchesAt(offset, ceList2));
+
+ alt.append(*string);
+ offset += ceList2->size();
+ }
+
+ const CEList altCEs(coll, alt);
+
+ if (ceList.matchesAt(0, &altCEs)) {
+ return alternate.append(alt);
+ }
+
+ return alternate.append(testCase);
+}
+
+static void generateTestCase(UCollator *coll, Monkey *monkeys[], int32_t monkeyCount, UnicodeString &testCase, UnicodeString &alternate)
+{
+ int32_t pieces = (m_rand() % 4) + 1;
+ UBool matches;
+
+ do {
+ testCase.remove();
+ alternate.remove();
+ monkeys[0]->append(testCase, alternate);
+
+ for(int32_t piece = 0; piece < pieces; piece += 1) {
+ int32_t monkey = m_rand() % monkeyCount;
+
+ monkeys[monkey]->append(testCase, alternate);
+ }
+
+ const CEList ceTest(coll, testCase);
+ const CEList ceAlt(coll, alternate);
+
+ matches = ceTest.matchesAt(0, &ceAlt);
+ } while (! matches);
+}
+
+static inline USet *uset_openEmpty()
+{
+ return uset_open(1, 0);
+}
+
+//
+// Find the next acceptable boundary following the specified starting index
+// in the target text being searched.
+// TODO: refine what is an acceptable boundary. For the moment,
+// choose the next position not within a combining sequence.
+//
+static int32_t nextBoundaryAfter(const UnicodeString &string, int32_t startIndex) {
+ const UChar *text = string.getBuffer();
+ int32_t textLen = string.length();
+
+ if (startIndex >= textLen) {
+ return startIndex;
+ }
+
+ UChar32 c;
+ int32_t i = startIndex;
+
+ U16_NEXT(text, i, textLen, c);
+
+ // If we are on a control character, stop without looking for combining marks.
+ // Control characters do not combine.
+ int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
+ if (gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR) {
+ return i;
+ }
+
+ // The initial character was not a control, and can thus accept trailing
+ // combining characters. Advance over however many of them there are.
+ int32_t indexOfLastCharChecked;
+
+ for (;;) {
+ indexOfLastCharChecked = i;
+
+ if (i>=textLen) {
+ break;
+ }
+
+ U16_NEXT(text, i, textLen, c);
+ gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
+
+ if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
+ break;
+ }
+ }
+
+ return indexOfLastCharChecked;
+}
+
+static UBool isInCombiningSequence(const UnicodeString &string, int32_t index) {
+ const UChar *text = string.getBuffer();
+ int32_t textLen = string.length();
+
+ if (index>=textLen || index<=0) {
+ return FALSE;
+ }
+
+ // If the character at the current index is not a GRAPHEME_EXTEND
+ // then we can not be within a combining sequence.
+ UChar32 c;
+ U16_GET(text, 0, index, textLen, c);
+ int32_t gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
+ if (gcProperty != U_GCB_EXTEND && gcProperty != U_GCB_SPACING_MARK) {
+ return FALSE;
+ }
+
+ // We are at a combining mark. If the preceding character is anything
+ // except a CONTROL, CR or LF, we are in a combining sequence.
+ U16_PREV(text, 0, index, c);
+ gcProperty = u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK);
+
+ return !(gcProperty==U_GCB_CONTROL || gcProperty==U_GCB_LF || gcProperty==U_GCB_CR);
+}
+
+static UBool simpleSearch(UCollator *coll, const UnicodeString &target, int32_t offset, const UnicodeString &pattern, int32_t &matchStart, int32_t &matchEnd)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ OrderList targetOrders(coll, target, offset);
+ OrderList patternOrders(coll, pattern);
+ int32_t targetSize = targetOrders.size() - 1;
+ int32_t patternSize = patternOrders.size() - 1;
+ UBreakIterator *charBreakIterator = ubrk_open(UBRK_CHARACTER, ucol_getLocale(coll, ULOC_VALID_LOCALE, &status),
+ target.getBuffer(), target.length(), &status);
+
+ if (patternSize == 0) {
+ matchStart = matchEnd = 0;
+ return FALSE;
+ }
+
+ matchStart = matchEnd = -1;
+
+ for(int32_t i = 0; i < targetSize; i += 1) {
+ if (targetOrders.matchesAt(i, patternOrders)) {
+ int32_t start = targetOrders.getLowOffset(i);
+ int32_t maxLimit = targetOrders.getLowOffset(i + patternSize);
+ int32_t minLimit = targetOrders.getLowOffset(i + patternSize - 1);
+
+ // if the low and high offsets of the first CE in
+ // the match are the same, it means that the match
+ // starts in the middle of an expansion - all but
+ // the first CE of the expansion will have the offset
+ // of the following character.
+ if (start == targetOrders.getHighOffset(i)) {
+ continue;
+ }
+
+ // Make sure match starts on a grapheme boundary
+ if (! ubrk_isBoundary(charBreakIterator, start)) {
+ continue;
+ }
+
+ // If the low and high offsets of the CE after the match
+ // are the same, it means that the match ends in the middle
+ // of an expansion sequence.
+ if (maxLimit == targetOrders.getHighOffset(i + patternSize) &&
+ targetOrders.getOrder(i + patternSize) != UCOL_NULLORDER) {
+ continue;
+ }
+
+ int32_t mend = maxLimit;
+
+ // Find the first grapheme break after the character index
+ // of the last CE in the match. If it's after character index
+ // that's after the last CE in the match, use that index
+ // as the end of the match.
+ if (minLimit < maxLimit) {
+ int32_t nba = ubrk_following(charBreakIterator, minLimit);
+
+ if (nba >= targetOrders.getHighOffset(i + patternSize - 1)) {
+ mend = nba;
+ }
+ }
+
+ if (mend > maxLimit) {
+ continue;
+ }
+
+ if (! ubrk_isBoundary(charBreakIterator, mend)) {
+ continue;
+ }
+
+ matchStart = start;
+ matchEnd = mend;
+
+ ubrk_close(charBreakIterator);
+ return TRUE;
+ }
+ }
+
+ ubrk_close(charBreakIterator);
+ return FALSE;
+}
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+static int32_t getIntParam(UnicodeString name, UnicodeString ¶ms, int32_t defaultVal) {
+ int32_t val = defaultVal;
+
+ name.append(" *= *(-?\\d+)");
+
+ UErrorCode status = U_ZERO_ERROR;
+ RegexMatcher m(name, params, 0, status);
+
+ if (m.find()) {
+ // The param exists. Convert the string to an int.
+ char valString[100];
+ int32_t paramLength = m.end(1, status) - m.start(1, status);
+
+ if (paramLength >= (int32_t)(sizeof(valString)-1)) {
+ paramLength = (int32_t)(sizeof(valString)-2);
+ }
+
+ params.extract(m.start(1, status), paramLength, valString, sizeof(valString));
+ val = strtol(valString, NULL, 10);
+
+ // Delete this parameter from the params string.
+ m.reset();
+ params = m.replaceFirst("", status);
+ }
+
+ //U_ASSERT(U_SUCCESS(status));
+ if (! U_SUCCESS(status)) {
+ val = defaultVal;
+ }
+
+ return val;
+}
+#endif
+
+int32_t SSearchTest::monkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
+ const char *name, const char *strength, uint32_t seed)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t actualStart = -1, actualEnd = -1;
+ //int32_t expectedStart = prefix.length(), expectedEnd = prefix.length() + altPattern.length();
+ int32_t expectedStart = -1, expectedEnd = -1;
+ int32_t notFoundCount = 0;
+ UStringSearch *uss = usearch_openFromCollator(pattern.getBuffer(), pattern.length(),
+ testCase.getBuffer(), testCase.length(),
+ coll,
+ NULL, // the break iterator
+ &status);
+
+ // **** TODO: find *all* matches, not just first one ****
+ simpleSearch(coll, testCase, 0, pattern, expectedStart, expectedEnd);
+
+#if 0
+ usearch_search(uss, 0, &actualStart, &actualEnd, &status);
+#else
+ actualStart = usearch_next(uss, &status);
+ actualEnd = actualStart + usearch_getMatchedLength(uss);
+#endif
+
+ if (actualStart != expectedStart || actualEnd != expectedEnd) {
+ errln("Search for <pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
+ " strength=%s seed=%d",
+ name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
+ }
+
+ if (expectedStart == -1 && actualStart == -1) {
+ notFoundCount += 1;
+ }
+
+ // **** TODO: find *all* matches, not just first one ****
+ simpleSearch(coll, testCase, 0, altPattern, expectedStart, expectedEnd);
+
+ usearch_setPattern(uss, altPattern.getBuffer(), altPattern.length(), &status);
+
+#if 0
+ usearch_search(uss, 0, &actualStart, &actualEnd, &status);
+#else
+ usearch_reset(uss);
+ actualStart = usearch_next(uss, &status);
+ actualEnd = actualStart + usearch_getMatchedLength(uss);
+#endif
+
+ if (actualStart != expectedStart || actualEnd != expectedEnd) {
+ errln("Search for <alt_pattern> in <%s> failed: expected [%d, %d], got [%d, %d]\n"
+ " strength=%s seed=%d",
+ name, expectedStart, expectedEnd, actualStart, actualEnd, strength, seed);
+ }
+
+ if (expectedStart == -1 && actualStart == -1) {
+ notFoundCount += 1;
+ }
+
+ usearch_close(uss);
+
+ return notFoundCount;
+}
+
+void SSearchTest::monkeyTest(char *params)
+{
+ // ook!
+ UErrorCode status = U_ZERO_ERROR;
+ U_STRING_DECL(test_pattern, "[[:assigned:]-[:ideographic:]-[:hangul:]-[:c:]]", 47);
+ U_STRING_INIT(test_pattern, "[[:assigned:]-[:ideographic:]-[:hangul:]-[:c:]]", 47);
+ UCollator *coll = ucol_open(NULL, &status);
+ USet *charsToTest = uset_openPattern(test_pattern, 47, &status);
+ USet *expansions = uset_openEmpty();
+ USet *contractions = uset_openEmpty();
+ StringToCEsMap *charsToCEList = new StringToCEsMap();
+ CEToStringsMap *ceToCharsStartingWith = new CEToStringsMap();
+
+ ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);
+
+ uset_addAll(charsToTest, contractions);
+ uset_addAll(charsToTest, expansions);
+
+ // TODO: set strength to UCOL_PRIMARY, change CEList to use strength?
+ buildData(coll, charsToTest, charsToCEList, ceToCharsStartingWith);
+
+ U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
+ U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
+ USet *letters = uset_openPattern(letter_pattern, 39, &status);
+ SetMonkey letterMonkey(letters);
+ StringSetMonkey contractionMonkey(contractions, coll, charsToCEList, ceToCharsStartingWith);
+ StringSetMonkey expansionMonkey(expansions, coll, charsToCEList, ceToCharsStartingWith);
+ UnicodeString testCase;
+ UnicodeString alternate;
+ UnicodeString pattern, altPattern;
+ UnicodeString prefix, altPrefix;
+ UnicodeString suffix, altSuffix;
+
+ Monkey *monkeys[] = {
+ &letterMonkey,
+ &contractionMonkey,
+ &expansionMonkey,
+ &contractionMonkey,
+ &expansionMonkey,
+ &contractionMonkey,
+ &expansionMonkey,
+ &contractionMonkey,
+ &expansionMonkey};
+ int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]);
+ int32_t nonMatchCount = 0;
+
+ UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY};
+ const char *strengthNames[] = {"primary", "secondary", "tertiary"};
+ int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]);
+ int32_t loopCount = quick? 1000 : 10000;
+ int32_t firstStrength = 0;
+ int32_t lastStrength = strengthCount - 1;
+
+ if (params != NULL) {
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+ UnicodeString p(params);
+
+ loopCount = getIntParam("loop", p, loopCount);
+ m_seed = getIntParam("seed", p, m_seed);
+
+ RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status);
+ if (m.find()) {
+ UnicodeString breakType = m.group(1, status);
+
+ for (int32_t s = 0; s < strengthCount; s += 1) {
+ if (breakType == strengthNames[s]) {
+ firstStrength = lastStrength = s;
+ break;
+ }
+ }
+
+ m.reset();
+ p = m.replaceFirst("", status);
+ }
+
+ if (RegexMatcher("\\S", p, 0, status).find()) {
+ // Each option is stripped out of the option string as it is processed.
+ // All options have been checked. The option string should have been completely emptied..
+ char buf[100];
+ p.extract(buf, sizeof(buf), NULL, status);
+ buf[sizeof(buf)-1] = 0;
+ errln("Unrecognized or extra parameter: %s\n", buf);
+ return;
+ }
+#else
+ infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters.");
+#endif
+ }
+
+ for(int32_t s = firstStrength; s <= lastStrength; s += 1) {
+ int32_t notFoundCount = 0;
+
+ ucol_setStrength(coll, strengths[s]);
+
+ // TODO: try alternate prefix and suffix too?
+ // TODO: alterntaes are only equal at primary strength. Is this OK?
+ for(int32_t t = 0; t < 10000; t += 1) {
+ uint32_t seed = m_seed;
+ int32_t nmc = 0;
+
+ generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern);
+ generateTestCase(coll, monkeys, monkeyCount, prefix, altPrefix);
+ generateTestCase(coll, monkeys, monkeyCount, suffix, altSuffix);
+
+ // pattern
+ notFoundCount += monkeyTestCase(coll, pattern, pattern, altPattern, "pattern", strengthNames[s], seed);
+
+ testCase.remove();
+ testCase.append(prefix);
+ testCase.append(/*alt*/pattern);
+
+ // prefix + pattern
+ notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern", strengthNames[s], seed);
+
+ testCase.append(suffix);
+
+ // prefix + pattern + suffix
+ notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern + suffix", strengthNames[s], seed);
+
+ testCase.remove();
+ testCase.append(pattern);
+ testCase.append(suffix);
+
+ // pattern + suffix
+ notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "pattern + suffix", strengthNames[s], seed);
+ }
+
+ logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount);
+ }
+
+ delete ceToCharsStartingWith;
+ delete charsToCEList;
+
+ uset_close(contractions);
+ uset_close(expansions);
+ uset_close(charsToTest);
+
+ ucol_close(coll);
+}
+
+
Added: trunk/source/test/intltest/ssearch.h
===================================================================
--- trunk/source/test/intltest/ssearch.h (rev 0)
+++ trunk/source/test/intltest/ssearch.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,40 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2008, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ */
+
+#ifndef __SSEARCH_H
+#define __SSEARCH_H
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/ucol.h"
+
+#include "intltest.h"
+
+//
+// Test of the function usearch_search()
+//
+// See srchtest.h for the tests for the rest of the string search functions.
+//
+class SSearchTest: public IntlTest {
+public:
+
+ SSearchTest();
+ virtual ~SSearchTest();
+
+ virtual void runIndexedTest(int32_t index, UBool exec, const char* &name, char* params = NULL );
+
+ virtual void searchTest();
+ virtual void offsetTest();
+ virtual void monkeyTest(char *params);
+
+private:
+ virtual const char *getPath(char buffer[2048], const char *filename);
+ virtual int32_t monkeyTestCase(UCollator *coll, const UnicodeString &testCase, const UnicodeString &pattern, const UnicodeString &altPattern,
+ const char *name, const char *strength, uint32_t seed);
+};
+
+#endif
Modified: trunk/source/test/intltest/tscoll.cpp
===================================================================
--- trunk/source/test/intltest/tscoll.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/intltest/tscoll.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2007, International Business Machines Corporation and
+ * Copyright (c) 1997-2008, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@@ -42,6 +42,7 @@
#include "normconf.h"
#include "thcoll.h"
#include "srchtest.h"
+#include "ssearch.h"
#include "cntabcol.h"
#include "lcukocol.h"
#include "ucaconf.h"
@@ -49,6 +50,9 @@
#include "cmemory.h"
//#include "rndmcoll.h"
+// Set to 1 to test offsets in backAndForth()
+#define TEST_OFFSETS 0
+
#define TESTCLASS(n,classname) \
case n: \
name = #classname; \
@@ -89,6 +93,7 @@
TESTCLASS(19, CollationServiceTest);
TESTCLASS(20, CollationFinnishTest); // removed by weiv - we have changed Finnish collation
//TESTCLASS(21, RandomCollatorTest); // See ticket 5747 about reenabling this test.
+ TESTCLASS(21, SSearchTest);
default: name = ""; break;
}
@@ -392,7 +397,7 @@
{
// Run through the iterator forwards and stick it into an array
int32_t orderLength = 0;
- int32_t *orders = getOrders(iter, orderLength);
+ Order *orders = getOrders(iter, orderLength);
UErrorCode status = U_ZERO_ERROR;
// Now go through it backwards and make sure we get the same values
@@ -404,6 +409,8 @@
while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
{
+ int32_t offset = iter.getOffset();
+
if (index == 0) {
if(o == 0) {
continue;
@@ -411,28 +418,39 @@
// going backwards
errln("Backward iteration returned a non ignorable after orders are exhausted");
break;
+ }
}
- }
- if (o != orders[--index])
- {
+
+ index -= 1;
+ if (o != orders[index].order) {
if (o == 0)
- index ++;
- else
- {
- while (index > 0 && orders[--index] == 0)
- {
+ index += 1;
+ else {
+ while (index > 0 && orders[--index].order == 0) {
+ // nothing...
}
- if (o != orders[index])
- {
- errln("Mismatch at index %d: 0x%X vs 0x%X", index,
- orders[index], o);
- break;
+
+ if (o != orders[index].order) {
+ errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
+ orders[index].order, o);
+ //break;
+ goto bail;
}
}
}
+
+#if TEST_OFFSETS
+ if (offset != orders[index].offset) {
+ errln("Mismatched offset at index %d: %d vs. %d", index,
+ orders[index].offset, offset);
+ //break;
+ goto bail;
+ }
+#endif
+
}
- while (index != 0 && orders[index - 1] == 0)
+ while (index != 0 && orders[index - 1].order == 0)
{
index --;
}
@@ -466,6 +484,7 @@
errln("");
}
+bail:
delete[] orders;
}
@@ -474,12 +493,13 @@
* Return an integer array containing all of the collation orders
* returned by calls to next on the specified iterator
*/
-int32_t *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
+IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
{
int32_t maxSize = 100;
int32_t size = 0;
- int32_t *orders = new int32_t[maxSize];
+ Order *orders = new Order[maxSize];
UErrorCode status = U_ZERO_ERROR;
+ int32_t offset = iter.getOffset();
int32_t order;
while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
@@ -487,21 +507,25 @@
if (size == maxSize)
{
maxSize *= 2;
- int32_t *temp = new int32_t[maxSize];
+ Order *temp = new Order[maxSize];
- uprv_memcpy(temp, orders, size * sizeof(int32_t));
+ uprv_memcpy(temp, orders, size * sizeof(Order));
delete[] orders;
orders = temp;
}
- orders[size++] = order;
+ orders[size].order = order;
+ orders[size].offset = offset;
+
+ offset = iter.getOffset();
+ size += 1;
}
if (maxSize > size)
{
- int32_t *temp = new int32_t[size];
+ Order *temp = new Order[size];
- uprv_memcpy(temp, orders, size * sizeof(int32_t));
+ uprv_memcpy(temp, orders, size * sizeof(Order));
delete[] orders;
orders = temp;
}
Modified: trunk/source/test/intltest/tscoll.h
===================================================================
--- trunk/source/test/intltest/tscoll.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/intltest/tscoll.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2003, International Business Machines Corporation and
+ * Copyright (c) 1997-2008, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@@ -23,6 +23,12 @@
class IntlTestCollator: public IntlTest {
void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL );
protected:
+ struct Order
+ {
+ int32_t order;
+ int32_t offset;
+ };
+
// These two should probably go down in IntlTest
void doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result);
@@ -42,7 +48,7 @@
* Return an integer array containing all of the collation orders
* returned by calls to next on the specified iterator
*/
- int32_t *getOrders(CollationElementIterator &iter, int32_t &orderLength);
+ Order *getOrders(CollationElementIterator &iter, int32_t &orderLength);
UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status);
};
Modified: trunk/source/test/intltest/tztest.cpp
===================================================================
--- trunk/source/test/intltest/tztest.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/intltest/tztest.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -612,7 +612,7 @@
{"EAT", 180, FALSE},
{"MET", 60, TRUE}, // updated 12/3/99 aliu
{"NET", 240, TRUE}, // updated 12/3/99 aliu
- {"PLT", 300, FALSE}, // updated Aug 2003 aliu
+ {"PLT", 300, TRUE}, // updated by 2008c
{"IST", 330, FALSE},
{"BST", 360, FALSE},
{"VST", 420, FALSE},
Modified: trunk/source/test/letest/gendata.xml
===================================================================
--- trunk/source/test/letest/gendata.xml 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/letest/gendata.xml 2008-05-31 14:31:31 UTC (rev 153)
@@ -113,4 +113,19 @@
<test-font name="BMIN00M.TTF"/>
<test-text>中華人民共和國 臺灣</test-text>
</test-case>
+
+ <test-case id="Telugu Syllable Boundary Test" script="telu">
+ <test-font name="gautami.ttf"/>
+ <test-text>ప్రకాష్</test-text>
+ </test-case>
+
+ <test-case id="Angsana New Mark Test" script="thai">
+ <test-font name="ANGSA.TTF"/>
+ <test-text>บทที่๑พายุไซโคลนโดโรธีอาศัยอยู่ท่ามกลางทุ่งใหญ่ในแคนซัสกับลุงเฮนรีชาวไร่และป้าเอ็มภรรยาชาวไร่บ้านของพวกเขาหลังเล็กเพราะไม้สร้างบ้านต้องขนมาด้วยเกวียนเป็นระยะทางหลายไมล์</test-text>
+ </test-case>
+
+ <test-case id="Sinhala Al-Lakuna Test" script="sinh">
+ <test-font name="lklug.hj.ttf"/>
+ <test-text>ක්රෙ ක්යෙ ක්ෂෙ ක්ෂ්යෙ ක්ෂෙ කර්මෙ ස්ට්රේ ස්සෙ ස්ස</test-text>
+ </test-case>
</layout-tests>
\ No newline at end of file
Modified: trunk/source/test/testdata/DataDrivenCollationTest.txt
===================================================================
--- trunk/source/test/testdata/DataDrivenCollationTest.txt 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/testdata/DataDrivenCollationTest.txt 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,4 +1,4 @@
-// Copyright (c) 2001-2007 International Business Machines
+// Copyright (c) 2001-2008 International Business Machines
// Corporation and others. All Rights Reserved.
DataDrivenCollationTest:table(nofallback) {
Info {
@@ -513,6 +513,25 @@
"xj<xSx<xș=xş<xȘ=xŞ<Xș=Xş<XȘ=XŞ<xșx=xşx<xȘx=xŞx<xT<xTx<xț=xţ<xȚ=xŢ<Xț=Xţ<XȚ"
"=XŢ<xțx=xţx<xȚx=xŢx<xU"
}
- }
+ }
+
+ testOffsets {
+ Info {
+ Description { "This tests cases where forwards and backwards iteration get different offsets" }
+ }
+
+ Settings {
+ {
+ TestLocale { "en" }
+ Arguments { "[strength 3]" }
+ }
+ }
+
+ Cases {
+ "a\uD800\uDC00\uDC00<b\uD800\uDC00\uDC00",
+ "\u0301A\u0301\u0301<\u0301B\u0301\u0301",
+ "abcd\r\u0301<abce\r\u0301"
+ }
+ }
}
}
Modified: trunk/source/test/testdata/Makefile.in
===================================================================
--- trunk/source/test/testdata/Makefile.in 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/testdata/Makefile.in 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,6 +1,6 @@
#******************************************************************************
#
-# Copyright (C) 1998-2007, International Business Machines
+# Copyright (C) 1998-2008, International Business Machines
# Corporation and others. All Rights Reserved.
#
#******************************************************************************
@@ -23,6 +23,8 @@
SUBDIRS =
+-include Makefile.local
+
## List of phony targets
.PHONY : all all-local all-recursive install install-local \
install-recursive clean clean-local clean-recursive distclean \
@@ -123,9 +125,10 @@
# import the shared .mk file
include $(TESTSRCDATADIR)/tstfiles.mk
+-include $(TESTSRCDATADIR)/tstlocal.mk
# TEST_RES_SOURCE comes from tstfiles.mk
-TEST_RES = $(TEST_RES_SOURCE) casing.txt mc.txt root.txt sh.txt sh_YU.txt te.txt te_IN.txt te_IN_REVISED.txt testtypes.txt testaliases.txt testempty.txt structLocale.txt idna_rules.txt conversion.txt icuio.txt testtable32.txt
+TEST_RES = $(TEST_RES_SOURCE) $(TEST_RES_LOCAL) casing.txt mc.txt root.txt sh.txt sh_YU.txt te.txt te_IN.txt te_IN_REVISED.txt testtypes.txt testaliases.txt testempty.txt structLocale.txt idna_rules.txt conversion.txt icuio.txt testtable32.txt
TEST_RES_FILES=$(TEST_RES:%.txt=$(TESTBUILDDIR)/%.res) $(TESTBUILDDIR)/iscii.res $(TESTBUILDDIR)/icu26_testtypes.res $(TESTBUILDDIR)/icu26e_testtypes.res
ALL_TEST_FILES = $(TEST_DAT_FILES) $(TEST_SPP_FILES) $(TEST_BRK_FILES) $(TEST_CNV_FILES) $(TEST_RES_FILES) $(TESTOUTDIR)/$(TESTDT)/nam.typ
Modified: trunk/source/test/testdata/letest.xml
===================================================================
--- trunk/source/test/testdata/letest.xml 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/test/testdata/letest.xml 2008-05-31 14:31:31 UTC (rev 153)
@@ -8,7 +8,7 @@
UNLESS YOU REALLY KNOW WHAT YOU'RE DOING.
file name: letest.xml
- generated on: 03/18/2008 11:44:15 AM Hawaiian Standard Time
+ generated on: 05/27/2008 03:13:34 PM Hawaiian Standard Time
generated by: gendata.cpp
-->
@@ -1287,4 +1287,169 @@
</result-positions>
</test-case>
+ <test-case id="Telugu Syllable Boundary Test" script="telu">
+ <test-font name="gautami.ttf" version="Version 1.21" checksum="0xAA71FEB6"/>
+
+ <test-text>ప్రకాష్</test-text>
+
+ <result-glyphs>
+ 0x00000057, 0x00000194, 0x0000FFFF, 0x00000125, 0x00000066, 0x00000241, 0x0000FFFF
+ </result-glyphs>
+
+ <result-indices>
+ 0x00000000, 0x00000002, 0x00000001, 0x00000003, 0x00000004, 0x00000005, 0x00000006
+ </result-indices>
+
+ <result-positions>
+ 0.000000, 0.000000, 8.384766, 0.000000, 8.285156, 0.000000, 8.285156, 0.000000,
+ 14.894531, 0.000000, 18.527344, 0.000000, 26.812500, 0.000000, 26.812500, 0.000000
+ </result-positions>
+ </test-case>
+
+ <test-case id="Angsana New Mark Test" script="thai">
+ <test-font name="ANGSA.TTF" version="Version 2.30" checksum="0xE5962FC6"/>
+
+ <test-text>บทที่๑พายุไซโคลนโดโรธีอาศัยอยู่ท่ามกลางทุ่งใหญ่ในแคนซัสกับลุงเฮนรีชาวไร่และป้าเอ็มภรรยาชาวไร่บ้านของพวกเขาหลังเล็กเพราะไม้สร้างบ้านต้องขนมาด้วยเกวียนเป็นระยะทางหลายไมล์</test-text>
+
+ <result-glyphs>
+ 0x0000009D, 0x0000009A, 0x0000009A, 0x000000B8, 0x000000C9, 0x000000D2, 0x000000A1, 0x000000B5,
+ 0x000000A5, 0x000000BB, 0x000000C5, 0x0000008E, 0x000000C3, 0x00000087, 0x000000A8, 0x0000009C,
+ 0x000000C3, 0x00000097, 0x000000C3, 0x000000A6, 0x0000009B, 0x000000B8, 0x000000B0, 0x000000B5,
+ 0x000000AB, 0x000000B4, 0x000000A5, 0x000000B0, 0x000000A5, 0x000000BC, 0x0000006E, 0x0000009A,
+ 0x0000006E, 0x000000B5, 0x000000A4, 0x00000084, 0x000000A8, 0x000000B5, 0x0000008A, 0x0000009A,
+ 0x000000BB, 0x0000006E, 0x0000008A, 0x000000C4, 0x000000AE, 0x00000090, 0x0000006E, 0x000000C4,
+ 0x0000009C, 0x000000C2, 0x00000087, 0x0000009C, 0x0000008E, 0x000000B4, 0x000000AD, 0x00000084,
+ 0x000000B4, 0x0000009D, 0x000000A8, 0x000000BB, 0x0000008A, 0x000000C1, 0x000000B1, 0x0000009C,
+ 0x000000A6, 0x000000B8, 0x0000008D, 0x000000B5, 0x000000AA, 0x000000C5, 0x000000A6, 0x0000006E,
+ 0x000000C2, 0x000000A8, 0x000000B3, 0x0000009E, 0x0000006A, 0x000000B5, 0x000000C1, 0x000000B0,
+ 0x000000C8, 0x000000A4, 0x000000A3, 0x000000A6, 0x000000A6, 0x000000A5, 0x000000B5, 0x0000008D,
+ 0x000000B5, 0x000000AA, 0x000000C5, 0x000000A6, 0x0000006E, 0x0000009D, 0x0000006F, 0x000000B5,
+ 0x0000009C, 0x00000085, 0x000000B0, 0x0000008A, 0x000000A1, 0x000000AA, 0x00000084, 0x000000C1,
+ 0x00000085, 0x000000B5, 0x000000AE, 0x000000A8, 0x000000B4, 0x0000008A, 0x000000C1, 0x000000A8,
+ 0x000000C8, 0x00000084, 0x000000C1, 0x000000A1, 0x000000A6, 0x000000B5, 0x000000B3, 0x000000C5,
+ 0x000000A4, 0x0000006F, 0x000000AD, 0x000000A6, 0x0000006F, 0x000000B5, 0x0000008A, 0x0000009D,
+ 0x0000006F, 0x000000B5, 0x0000009C, 0x00000098, 0x0000006F, 0x000000B0, 0x0000008A, 0x00000085,
+ 0x0000009C, 0x000000A4, 0x000000B5, 0x00000097, 0x0000006F, 0x000000AA, 0x000000A5, 0x000000C1,
+ 0x00000084, 0x000000AA, 0x000000B8, 0x000000A5, 0x0000009C, 0x000000C1, 0x0000009E, 0x0000007D,
+ 0x0000009C, 0x000000A6, 0x000000B3, 0x000000A5, 0x000000B3, 0x0000009A, 0x000000B5, 0x0000008A,
+ 0x000000AE, 0x000000A8, 0x000000B5, 0x000000A5, 0x000000C5, 0x000000A4, 0x000000A8, 0x00000072
+ </result-glyphs>
+
+ <result-indices>
+ 0x00000000, 0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005, 0x00000006, 0x00000007,
+ 0x00000008, 0x00000009, 0x0000000A, 0x0000000B, 0x0000000C, 0x0000000D, 0x0000000E, 0x0000000F,
+ 0x00000010, 0x00000011, 0x00000012, 0x00000013, 0x00000014, 0x00000015, 0x00000016, 0x00000017,
+ 0x00000018, 0x00000019, 0x0000001A, 0x0000001B, 0x0000001C, 0x0000001D, 0x0000001E, 0x0000001F,
+ 0x00000020, 0x00000021, 0x00000022, 0x00000023, 0x00000024, 0x00000025, 0x00000026, 0x00000027,
+ 0x00000028, 0x00000029, 0x0000002A, 0x0000002B, 0x0000002C, 0x0000002D, 0x0000002E, 0x0000002F,
+ 0x00000030, 0x00000031, 0x00000032, 0x00000033, 0x00000034, 0x00000035, 0x00000036, 0x00000037,
+ 0x00000038, 0x00000039, 0x0000003A, 0x0000003B, 0x0000003C, 0x0000003D, 0x0000003E, 0x0000003F,
+ 0x00000040, 0x00000041, 0x00000042, 0x00000043, 0x00000044, 0x00000045, 0x00000046, 0x00000047,
+ 0x00000048, 0x00000049, 0x0000004A, 0x0000004B, 0x0000004C, 0x0000004D, 0x0000004E, 0x0000004F,
+ 0x00000050, 0x00000051, 0x00000052, 0x00000053, 0x00000054, 0x00000055, 0x00000056, 0x00000057,
+ 0x00000058, 0x00000059, 0x0000005A, 0x0000005B, 0x0000005C, 0x0000005D, 0x0000005E, 0x0000005F,
+ 0x00000060, 0x00000061, 0x00000062, 0x00000063, 0x00000064, 0x00000065, 0x00000066, 0x00000067,
+ 0x00000068, 0x00000069, 0x0000006A, 0x0000006B, 0x0000006C, 0x0000006D, 0x0000006E, 0x0000006F,
+ 0x00000070, 0x00000071, 0x00000072, 0x00000073, 0x00000074, 0x00000075, 0x00000076, 0x00000077,
+ 0x00000078, 0x00000079, 0x0000007A, 0x0000007B, 0x0000007C, 0x0000007D, 0x0000007E, 0x0000007F,
+ 0x00000080, 0x00000081, 0x00000082, 0x00000083, 0x00000084, 0x00000085, 0x00000086, 0x00000087,
+ 0x00000088, 0x00000089, 0x0000008A, 0x0000008B, 0x0000008C, 0x0000008D, 0x0000008E, 0x0000008F,
+ 0x00000090, 0x00000091, 0x00000092, 0x00000093, 0x00000094, 0x00000095, 0x00000096, 0x00000097,
+ 0x00000098, 0x00000099, 0x0000009A, 0x0000009B, 0x0000009C, 0x0000009D, 0x0000009E, 0x0000009F,
+ 0x000000A0, 0x000000A1, 0x000000A2, 0x000000A3, 0x000000A4, 0x000000A5, 0x000000A6, 0x000000A7
+ </result-indices>
+
+ <result-positions>
+ 0.000000, 0.000000, 5.399414, 0.000000, 10.798828, 0.000000, 16.198242, 0.000000,
+ 16.198242, 0.000000, 16.198242, 0.000000, 21.046875, 0.000000, 26.616211, 0.000000,
+ 30.035156, 0.000000, 34.151367, 0.000000, 34.151367, 0.000000, 38.279297, 0.000000,
+ 43.558594, 0.000000, 47.663086, 0.000000, 52.438477, 0.000000, 57.178711, 0.000000,
+ 62.698242, 0.000000, 66.802734, 0.000000, 71.601563, 0.000000, 75.706055, 0.000000,
+ 79.810547, 0.000000, 84.369141, 0.000000, 84.369141, 0.000000, 89.097656, 0.000000,
+ 92.516602, 0.000000, 97.195313, 0.000000, 97.195313, 0.000000, 101.311523, 0.000000,
+ 106.040039, 0.000000, 110.156250, 0.000000, 110.156250, 0.000000, 110.156250, 0.000000,
+ 115.555664, 0.000000, 115.555664, 0.000000, 118.974609, 0.000000, 124.013672, 0.000000,
+ 128.765625, 0.000000, 133.505859, 0.000000, 136.924805, 0.000000, 140.704102, 0.000000,
+ 146.103516, 0.000000, 146.103516, 0.000000, 146.103516, 0.000000, 149.882813, 0.000000,
+ 153.553711, 0.000000, 159.158203, 0.000000, 165.421875, 0.000000, 165.421875, 0.000000,
+ 169.092773, 0.000000, 174.612305, 0.000000, 179.135742, 0.000000, 183.911133, 0.000000,
+ 189.430664, 0.000000, 194.709961, 0.000000, 194.709961, 0.000000, 199.989258, 0.000000,
+ 204.741211, 0.000000, 204.741211, 0.000000, 210.140625, 0.000000, 214.880859, 0.000000,
+ 214.880859, 0.000000, 218.660156, 0.000000, 220.675781, 0.000000, 225.128906, 0.000000,
+ 230.648438, 0.000000, 234.752930, 0.000000, 234.752930, 0.000000, 239.613281, 0.000000,
+ 243.032227, 0.000000, 247.280273, 0.000000, 251.408203, 0.000000, 255.512695, 0.000000,
+ 255.512695, 0.000000, 260.036133, 0.000000, 264.776367, 0.000000, 269.071289, 0.000000,
+ 274.470703, 0.000000, 274.470703, 0.000000, 277.889648, 0.000000, 279.905273, 0.000000,
+ 284.633789, 0.000000, 284.633789, 0.000000, 289.672852, 0.000000, 294.641602, 0.000000,
+ 298.746094, 0.000000, 302.850586, 0.000000, 306.966797, 0.000000, 310.385742, 0.000000,
+ 315.246094, 0.000000, 318.665039, 0.000000, 322.913086, 0.000000, 327.041016, 0.000000,
+ 331.145508, 0.000000, 331.145508, 0.000000, 336.544922, 0.000000, 336.544922, 0.000000,
+ 339.963867, 0.000000, 345.483398, 0.000000, 350.258789, 0.000000, 354.987305, 0.000000,
+ 358.766602, 0.000000, 364.335938, 0.000000, 368.583984, 0.000000, 373.335938, 0.000000,
+ 375.351563, 0.000000, 380.126953, 0.000000, 383.545898, 0.000000, 389.150391, 0.000000,
+ 393.890625, 0.000000, 393.890625, 0.000000, 397.669922, 0.000000, 399.685547, 0.000000,
+ 404.425781, 0.000000, 404.425781, 0.000000, 409.177734, 0.000000, 411.193359, 0.000000,
+ 416.762695, 0.000000, 420.867188, 0.000000, 424.286133, 0.000000, 428.581055, 0.000000,
+ 432.708984, 0.000000, 437.748047, 0.000000, 437.748047, 0.000000, 443.027344, 0.000000,
+ 447.131836, 0.000000, 447.131836, 0.000000, 450.550781, 0.000000, 454.330078, 0.000000,
+ 459.729492, 0.000000, 459.729492, 0.000000, 463.148438, 0.000000, 468.667969, 0.000000,
+ 473.478516, 0.000000, 473.478516, 0.000000, 478.207031, 0.000000, 481.986328, 0.000000,
+ 486.761719, 0.000000, 492.281250, 0.000000, 497.320313, 0.000000, 500.739258, 0.000000,
+ 505.538086, 0.000000, 505.538086, 0.000000, 509.786133, 0.000000, 513.902344, 0.000000,
+ 515.917969, 0.000000, 520.669922, 0.000000, 524.917969, 0.000000, 524.917969, 0.000000,
+ 529.034180, 0.000000, 534.553711, 0.000000, 536.569336, 0.000000, 541.968750, 0.000000,
+ 541.968750, 0.000000, 547.488281, 0.000000, 551.592773, 0.000000, 555.887695, 0.000000,
+ 560.003906, 0.000000, 564.298828, 0.000000, 569.698242, 0.000000, 573.117188, 0.000000,
+ 576.896484, 0.000000, 582.500977, 0.000000, 587.241211, 0.000000, 590.660156, 0.000000,
+ 594.776367, 0.000000, 598.904297, 0.000000, 603.943359, 0.000000, 608.683594, 0.000000,
+ 608.683594, 0.000000
+ </result-positions>
+ </test-case>
+
+ <test-case id="Sinhala Al-Lakuna Test" script="sinh">
+ <test-font name="lklug.hj.ttf" version="Version 0.3 " checksum="0x2A8B3DA2"/>
+
+ <test-text>ක්රෙ ක්යෙ ක්ෂෙ ක්ෂ්යෙ ක්ෂෙ කර්මෙ ස්ට්රේ ස්සෙ ස්ස</test-text>
+
+ <result-glyphs>
+ 0x0000004A, 0x000001D3, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x0000004A, 0x00000018,
+ 0x00000089, 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x0000004A, 0x00000088, 0x0000FFFF, 0x0000FFFF,
+ 0x0000FFFF, 0x00000003, 0x0000004A, 0x00000088, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000089,
+ 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x000001D4, 0x0000FFFF, 0x0000004A, 0x0000003C, 0x00000003,
+ 0x00000018, 0x0000004A, 0x000001F6, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x000000A7,
+ 0x0000FFFF, 0x0000004A, 0x00000078, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000003,
+ 0x0000004A, 0x00000201, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x000000A7, 0x0000FFFF,
+ 0x0000003D
+ </result-glyphs>
+
+ <result-indices>
+ 0x00000004, 0x00000000, 0x00000001, 0x00000002, 0x00000003, 0x00000005, 0x0000000A, 0x00000006,
+ 0x00000007, 0x00000008, 0x00000009, 0x0000000B, 0x00000010, 0x0000000C, 0x0000000D, 0x0000000E,
+ 0x0000000F, 0x00000011, 0x00000019, 0x00000012, 0x00000013, 0x00000014, 0x00000015, 0x00000016,
+ 0x00000017, 0x00000018, 0x0000001A, 0x0000001B, 0x0000001C, 0x0000001E, 0x0000001D, 0x0000001F,
+ 0x00000020, 0x00000025, 0x00000021, 0x00000022, 0x00000023, 0x00000024, 0x00000026, 0x00000027,
+ 0x00000028, 0x0000002D, 0x00000029, 0x0000002A, 0x0000002B, 0x0000002C, 0x0000002D, 0x0000002E,
+ 0x00000033, 0x0000002F, 0x00000030, 0x00000031, 0x00000032, 0x00000034, 0x00000035, 0x00000036,
+ 0x00000037
+ </result-indices>
+
+ <result-positions>
+ 0.000000, 0.000000, 8.520000, 0.000000, 19.224001, 0.000000, 19.224001, 0.000000,
+ 19.224001, 0.000000, 19.224001, 0.000000, 26.640001, 0.000000, 35.160004, 0.000000,
+ 45.864006, 0.000000, 51.936005, 0.000000, 51.936005, 0.000000, 51.936005, 0.000000,
+ 59.352005, 0.000000, 67.872009, 0.000000, 82.704010, 0.000000, 82.704010, 0.000000,
+ 82.704010, 0.000000, 82.704010, 0.000000, 90.120010, 0.000000, 98.640015, 0.000000,
+ 113.472015, 0.000000, 113.472015, 0.000000, 113.472015, 0.000000, 113.472015, 0.000000,
+ 119.544014, 0.000000, 119.544014, 0.000000, 119.544014, 0.000000, 126.960014, 0.000000,
+ 137.664017, 0.000000, 137.664017, 0.000000, 146.184021, 0.000000, 154.296021, 0.000000,
+ 161.712021, 0.000000, 172.416016, 0.000000, 180.936020, 0.000000, 189.552017, 0.000000,
+ 189.552017, 0.000000, 189.552017, 0.000000, 189.552017, 0.000000, 196.968018, 0.000000,
+ 205.584015, 0.000000, 205.584015, 0.000000, 214.104019, 0.000000, 222.720016, 0.000000,
+ 222.720016, 0.000000, 222.720016, 0.000000, 222.720016, 0.000000, 222.720016, 0.000000,
+ 230.136017, 0.000000, 238.656021, 0.000000, 254.784027, 0.000000, 254.784027, 0.000000,
+ 254.784027, 0.000000, 254.784027, 0.000000, 262.200012, 0.000000, 270.816010, 0.000000,
+ 270.816010, 0.000000, 279.432007, 0.000000
+ </result-positions>
+ </test-case>
+
</layout-tests>
Added: trunk/source/test/testdata/ssearch.xml
===================================================================
--- trunk/source/test/testdata/ssearch.xml (rev 0)
+++ trunk/source/test/testdata/ssearch.xml 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,413 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!-- Copyright (c) 2007-2008 IBM Corporation and others. All rights reserved -->
+
+<!-- Test data file for string search -->
+<!DOCTYPE stringsearch-tests [
+<!ELEMENT stringsearch-tests (test-case+)>
+<!ATTLIST stringsearch-tests debug IDREF #IMPLIED >
+<!ELEMENT test-case (pattern, pre?, m?, post?)>
+<!ATTLIST test-case
+ id ID #REQUIRED
+ locale CDATA "en"
+ strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TERTIARY"
+ norm (ON | OFF) "OFF"
+ >
+
+<!ELEMENT pattern (#PCDATA)>
+<!ELEMENT pre (#PCDATA)>
+<!ELEMENT m (#PCDATA)>
+<!ELEMENT post (#PCDATA)>
+]>
+
+<stringsearch-tests debug="test32">
+ <!-- debug="test11" (for copying into the above element) -->
+
+ <!-- Very simple match -->
+ <test-case id="test01" >
+ <pattern>abc</pattern>
+ <pre>xxx</pre><m>abc</m><post>yyy</post>
+ </test-case>
+
+ <!-- Very simple no-match -->
+ <test-case id="test02" >
+ <pattern>abc</pattern>
+ <pre>xxx</pre><post>yyy</post>
+ </test-case>
+
+ <!-- Match after several near-misses. -->
+ <test-case id="test03" >
+ <pattern>string</pattern>
+ <pre>silly spring stling strxng strilg strinx stri</pre><m>string</m><post> fling</post>
+ </test-case>
+
+ <test-case id="test04" strength="PRIMARY" >
+ <pattern>FUSS</pattern>
+ <pre>abc</pre><m>fuss</m><post>sss</post>
+ </test-case>
+
+ <test-case id="test05" strength="PRIMARY" >
+ <pattern>FUSS</pattern>
+ <pre>abc</pre><m>fuß</m><post>sss</post>
+ </test-case>
+
+ <test-case id="test05.5" strength="PRIMARY" >
+ <pattern>fuss</pattern>
+ <pre>a </pre>
+ <m>fuß</m>
+ <post>ball table</post>
+ </test-case>
+
+ <test-case id="test06" strength="PRIMARY" >
+ <pattern>fuß</pattern>
+ <pre>abc</pre><m>fuss</m><post>xyz</post>
+ </test-case>
+
+ <test-case id="test07" strength="SECONDARY" >
+ <pattern>fuß</pattern>
+ <pre>abcfussxyz</pre>
+ </test-case>
+
+ <test-case id="test08" strength="PRIMARY" >
+ <pattern>fus</pattern>
+ <pre>abcfuß</pre><post>xyz</post>
+ </test-case>
+
+ <!-- A good match following an initial match that failed because
+ of not ending on a character boundary -->
+ <test-case id="test09" strength="PRIMARY">
+ <pattern>fus</pattern>
+ <pre>fuß </pre><m>fus</m><post>sss</post>
+ </test-case>
+
+
+ <!-- Test cases from usrchdat.c BREAKITERATOREXACT -->
+
+ <test-case id="test10" strength="TERTIARY">
+ <pattern>fox</pattern>
+ <m>fox</m><post>y fox</post>
+ </test-case>
+
+ <test-case id="test11" strength="PRIMARY" locale="de_DE at collation=phonebook">
+ <pattern>toe</pattern>
+ <pre>This is a </pre><m>Tö</m><post>ne</post>
+ </test-case>
+
+ <test-case id="test11a" strength="SECONDARY" locale="de_DE at collation=phonebook">
+ <pattern>toe</pattern>
+ <pre>This is a </pre><post>Töne</post>
+ </test-case>
+
+ <test-case id="test12" strength="TERTIARY">
+ <pattern>e</pattern>
+ <pre>tésting that é doés not match </pre><m>e</m><post></post>
+ </test-case>
+
+ <test-case id="test13" strength="PRIMARY" locale="fr">
+ <pattern>e</pattern>
+ <pre></pre><m>É</m><post>É</post>
+ </test-case>
+
+ <test-case id="test14" strength="PRIMARY" locale="fr">
+ <pattern>O</pattern>
+ <pre>C</pre><m>O\u0302</m><post>TÉ</post>
+ </test-case>
+
+
+ <!-- Test cases from usrchdat.c STRENGTH -->
+
+
+ <test-case id="test15" strength="PRIMARY" locale="en">
+ <pattern>fox</pattern>
+ <pre>The quick brown </pre><m>fox</m><post> jumps over the lazy foxes</post>
+ </test-case>
+
+ <test-case id="test16" strength="PRIMARY" locale="fr">
+ <pattern>peche</pattern>
+ <pre>blackbirds pat </pre><m>p\u00E9ch\u00E9</m><post> </post>
+ </test-case>
+
+ <test-case id="test17" strength="PRIMARY" locale="fr">
+ <pattern>peche</pattern>
+ <pre>blackbirds pat </pre><m>p\u00EAche</m><post> </post>
+ </test-case>
+
+ <test-case id="test18" strength="PRIMARY" locale="fr">
+ <pattern>peche</pattern>
+ <pre>blackbirds pat </pre><m>p\u00E9che</m><post>r </post>
+ </test-case>
+
+ <test-case id="test19" strength="PRIMARY" locale="fr">
+ <pattern>peche</pattern>
+ <pre>blackbirds pat </pre><m>p\u00EAche</m><post>r </post>
+ </test-case>
+
+ <test-case id="test20" strength="PRIMARY" locale="es">
+ <pattern>channel</pattern>
+ <pre>A </pre><m>channel</m><post>, </post>
+ </test-case>
+
+ <test-case id="test21" strength="PRIMARY" locale="es">
+ <pattern>channel</pattern>
+ <pre>A </pre><m>CHANNEL</m><post>, </post>
+ </test-case>
+
+ <test-case id="test22" strength="PRIMARY" locale="es">
+ <pattern>channel</pattern>
+ <pre>A </pre><m>Channel</m><post>s, </post>
+ </test-case>
+
+ <test-case id="test23" strength="PRIMARY" locale="es">
+ <pattern>channel</pattern>
+ <pre>A </pre><m>channel</m><post>... </post>
+ </test-case>
+
+ <test-case id="test24" strength="TERTIARY" locale="en">
+ <pattern>A\u0300</pattern>
+ <pre>A miss, and then </pre><m>\u00c0</m><post> should match but not A"</post>
+ </test-case>
+
+ <!-- TODO: In the original test data, this test matched at IDENTICAL strength.
+ Doesn't seem right. The characters are different.
+ -->
+ <test-case id="test24a" strength="IDENTICAL" locale="en">
+ <pattern>A\u0300</pattern>
+ <pre>At IDENTICAL, shoud this match? </pre><m>\u00c0</m><post></post>
+ </test-case>
+
+ <test-case id="test25" strength="SECONDARY" locale="en">
+ <pattern>Ű</pattern>
+ <pre>12</pre><m>ű</m><post> Ű</post>
+ </test-case>
+
+ <test-case id="test26" strength="SECONDARY" locale="en">
+ <pattern>A</pattern>
+ <pre>12</pre><m>a</m><post>...</post>
+ </test-case>
+
+
+ <!-- Test Cases from usrchdat.c, VARIABLE -->
+ <test-case id="test27" strength="TERTIARY" locale="en">
+ <pattern>blackbird</pattern>
+ <pre>black-bird </pre><m>blackbird</m><post>...</post>
+ </test-case>
+
+ <test-case id="test28" strength="TERTIARY" locale="en">
+ <pattern>go</pattern>
+ <pre> on</pre>
+ </test-case>
+
+ <!-- TODO: this gives an U_ILLEGAL_ARGUMENT error when opening
+ the UStringSearch. How did the orignal test run? -->
+ <!--
+ <test-case id="test29" strength="PRIMARY" locale="en">
+ <pattern> </pattern>
+ <pre></pre><m></m><post>abc</post>
+ </test-case>
+ -->
+
+ <test-case id="test30" strength="SECONDARY" locale="en">
+ <pattern>abc</pattern>
+ <pre> a bc ab c a bc ab c"</pre>
+ </test-case>
+
+ <test-case id="test31" strength="SECONDARY" locale="en">
+ <pattern>abc</pattern>
+ <pre> ---------------</pre>
+ </test-case>
+
+
+ <!-- Normalization test cases from usrchdat.c -->
+ <test-case id="test32" strength="TERTIARY" norm="ON">
+ <pattern>a\u0325\u0300</pattern>
+ <pre></pre><m>a\u0300\u0325</m>
+ </test-case>
+
+
+ <test-case id="test32a" strength="TERTIARY" norm="OFF">
+ <pattern>a\u0325\u0300</pattern>
+ <pre>a\u0300\u0325</pre>
+ </test-case>
+
+
+ <!-- COMPOSITEBOUNDARIES from usrchdat.c
+ Boundaries are not identical to orignal test data because
+ of matching only full combining sequences
+ -->
+ <test-case id="test40" strength="TERTIARY">
+ <pattern>A</pattern>
+ <pre>À</pre> <!-- \u00C0 -->
+ </test-case>
+
+ <test-case id="test41" strength="TERTIARY">
+ <pattern>A</pattern>
+ <pre>À</pre><m>A</m><post>C</post>
+ </test-case>
+
+ <test-case id="test42" strength="TERTIARY">
+ <pattern>A\u030A</pattern>
+ <pre>À\u01FA</pre>
+ </test-case>
+
+
+
+ <!-- SUPPLEMENTARYCANONICAL from usrchdat.c -->
+ <test-case id="test50" strength="TERTIARY">
+ <pattern>\uD800\uDC00</pattern>
+ <pre>abc \uD802\uDC00 \uD800\uDC01 \uD801\uDC00 </pre><m>\uD800\uDC00</m>
+ <post>abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00</post>
+ </test-case>
+
+ <test-case id="test51" strength="TERTIARY">
+ <pattern>\\uD834\\uDDB9</pattern>
+ <pre>and</pre><m>\\uD834\\uDDB9</m><post>this sentence</post>
+ </test-case>
+
+ <test-case id="test52" strength="TERTIARY">
+ <pattern> \\uD834\\uDDB9 </pattern>
+ <pre>and</pre><m> \\uD834\\uDDB9 </m><post>this sentence</post>
+ </test-case>
+
+ <test-case id="test53" strength="TERTIARY">
+ <pattern>-\\uD834\\uDDB9-</pattern>
+ <pre>and</pre><m>-\\uD834\\uDDB9-</m><post>this sentence</post>
+ </test-case>
+
+ <test-case id="test54" strength="TERTIARY">
+ <pattern>,\\uD834\\uDDB9,</pattern>
+ <pre>and</pre><m>,\\uD834\\uDDB9,</m><post>this sentence</post>
+ </test-case>
+
+ <test-case id="test55" strength="TERTIARY">
+ <pattern>?\\uD834\\uDDB9?</pattern>
+ <pre>and</pre><m>?\\uD834\\uDDB9?</m><post>this sentence</post>
+ </test-case>
+
+
+ <!-- Long combining sequences -->
+ <test-case id="test60" strength="PRIMARY">
+ <pattern>A\u0301\u0301\u0301\u0301</pattern>
+ <m>A\u0301\u0301\u0301\u0301\u0301</m>
+ </test-case>
+
+ <test-case id="test61" strength="TERTIARY">
+ <pattern>A\u0301\u0301\u0301\u0301</pattern>
+ <pre>A\u0301\u0301\u0301\u0301\u0301</pre>
+ </test-case>
+
+ <test-case id="test62" strength="TERTIARY">
+ <pattern>A\u0301\u0301\u0301\u0301</pattern>
+ <m>A\u0301\u0301\u0301\u0301</m>
+ </test-case>
+
+ <!-- stand-alone combining marks don't match attached marks -->
+ <test-case id="test63" strength="TERTIARY">
+ <pattern>\u0301</pattern>
+ <pre>A\u0301\u0301\u0301\u0301</pre>
+ </test-case>
+
+ <test-case id="test64" strength="TERTIARY">
+ <pattern>\u0301</pattern>
+ <post>\u0301\u0301\u0301\u0301</post>
+ </test-case>
+
+ <!-- stand-alone combining mark does match an un-attached combining mark -->
+ <test-case id="test65" strength="TERTIARY">
+ <pattern>\u0301</pattern>
+ <m>\u0301</m><post>A\u0301\u0301</post>
+ </test-case>
+
+ <test-case id="test66" strength="TERTIARY">
+ <pattern>\u0301</pattern>
+ <m>\u0301</m>
+ </test-case>
+
+ <!-- stand-alone combining marks at end of the target text -->
+ <test-case id="test67" strength="TERTIARY">
+ <pattern>\u0301</pattern>
+ <pre>abcd\r</pre><m>\u0301</m>
+ </test-case>
+
+ <!-- attached combining marks at end of the target text, no match -->
+ <test-case id="test68" strength="TERTIARY">
+ <pattern>\u0301</pattern>
+ <pre>abcd\u0301</pre>
+ </test-case>
+
+
+
+ <!-- no match within expansions at the start -->
+ <test-case id="test70" strength="PRIMARY">
+ <pattern>Eligature</pattern>
+ <pre>Æligature</pre>
+ </test-case>
+
+ <test-case id="test71" strength="PRIMARY">
+ <pattern>AEligature</pattern>
+ <m>Æligature</m>
+ </test-case>
+
+ <test-case id="test72" strength="PRIMARY">
+ <pattern>AEligature</pattern>
+ <m>Æligature</m>
+ </test-case>
+
+ <!-- unattached combining Tilde will not match a Tilde that is
+ part of a composed Ñ (\u00D1) -->
+ <test-case id="test73" strength="SECONDARY">
+ <pattern>\u0303</pattern> <!-- combining tilde -->
+ <pre>Ñ
</pre><m>\u0303</m>
+ </test-case>
+
+ <test-case id="test74" strength="SECONDARY">
+ <pattern>\u0303</pattern> <!-- combining tilde -->
+ <pre>Ñ 
</pre><m>\u0303</m><post>a</post>
+ </test-case>
+
+ <test-case id="test75" strength="TERTIARY" locale="fr">
+ <pattern>\u00EA</pattern>
+ <pre>p</pre><m>\u00EA</m><post>che</post>
+ </test-case>
+
+ <test-case id="test76" strength="TERTIARY" locale="fr">
+ <pattern>\u00EA</pattern>
+ <pre>p</pre><m>e\u0302</m><post>che</post>
+ </test-case>
+
+ <test-case id="test77" strength="TERTIARY" locale="fr">
+ <pattern>e\u0302</pattern>
+ <pre>p</pre><m>\u00EA</m><post>che</post>
+ </test-case>
+
+ <!-- Test cases from ticket:5382 -->
+ <test-case id="test78" strength="SECONDARY" locale="hu_HU">
+ <pattern>\u0170</pattern>
+ <m>\u0171</m>
+ <post>12</post>
+ </test-case>
+
+ <test-case id="test79" strength="SECONDARY" locale="hu_HU">
+ <pattern>\u0170</pattern>
+ <pre>1</pre>
+ <m>\u0171</m>
+ <post>2</post>
+ </test-case>
+
+ <test-case id="test80" strength="SECONDARY" locale="hu_HU">
+ <pattern>\u0170</pattern>
+ <pre>12</pre>
+ <m>\u0171</m>
+ </test-case>
+
+ <!-- Test cases from ticket:5959 -->
+ <test-case id="test81" strength="SECONDARY">
+ <pattern>\u2166</pattern>
+ <m>VII</m>
+ </test-case>
+
+ <test-case id="test82" strength="SECONDARY">
+ <pattern>VII</pattern>
+ <m>\u2166</m>
+ </test-case>
+</stringsearch-tests>
+
Added: trunk/source/test/testdata/test1bmp.ucm
===================================================================
--- trunk/source/test/testdata/test1bmp.ucm (rev 0)
+++ trunk/source/test/testdata/test1bmp.ucm 2008-05-31 14:31:31 UTC (rev 153)
@@ -0,0 +1,39 @@
+# *******************************************************************************
+# * Copyright (C) 2007, International Business Machines
+# * Corporation and others. All Rights Reserved.
+# *******************************************************************************
+#
+# test1.ucm
+#
+# Test file for MBCS conversion with single-byte codepage data.
+# BMP-only, to test optimized code path.
+# Includes extensions.
+
+<code_set_name> "test1bmp"
+<mb_cur_max> 1
+<mb_cur_min> 1
+<uconv_class> "MBCS"
+<subchar> \xff
+<icu:state> 0, 5-9, ff
+
+CHARMAP
+
+# fromUnicode result is zero byte from other than U+0000
+<U0040> \x00 |0
+<U0061> \x00 |1
+
+# nothing special
+<U0065> \x05 |0
+
+# toUnicode result is fallback direct
+<U0066> \x06 |3
+
+#unassigned \x09
+
+# 1:2 mapping
+<U0074> \x07\x09 |0
+
+# 2:1 mapping
+<U0075><U0076> \x08 |0
+
+END CHARMAP
Modified: trunk/source/tools/genrb/read.c
===================================================================
--- trunk/source/tools/genrb/read.c 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/tools/genrb/read.c 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1998-2003, International Business Machines
+* Copyright (C) 1998-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -340,30 +340,30 @@
return c;
}
- c = ucbuf_getc(buf,status);
+ c = ucbuf_getc(buf,status); /* "/c" */
if (c == U_EOF) {
return U_EOF;
}
switch (c) {
- case SLASH:
+ case SLASH: /* "//" */
seekUntilNewline(buf, NULL, status);
break;
- case ASTERISK:
- c2 = ucbuf_getc(buf, status);
- if(c2== ASTERISK){
+ case ASTERISK: /* "/*" */
+ c2 = ucbuf_getc(buf, status); /* "/*c" */
+ if(c2 == ASTERISK){ /* "/**" */
/* parse multi-line comment and store it in token*/
seekUntilEndOfComment(buf, token, status);
- }else{
- ucbuf_ungetc(c, buf);
+ } else {
+ ucbuf_ungetc(c2, buf); /* c2 is the non-asterisk following "/*". Include c2 back in buffer. */
seekUntilEndOfComment(buf, NULL, status);
}
break;
default:
- ucbuf_ungetc(c, buf);
+ ucbuf_ungetc(c, buf); /* "/c" - put back the c */
/* If get() failed this is a NOP */
return SLASH;
}
Modified: trunk/source/tools/toolutil/ucbuf.c
===================================================================
--- trunk/source/tools/toolutil/ucbuf.c 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/tools/toolutil/ucbuf.c 2008-05-31 14:31:31 UTC (rev 153)
@@ -538,21 +538,14 @@
/* decrement currentPos pointer
* if not at the begining of buffer
*/
- UChar escaped[8] ={'\0'};
- int32_t len =0;
- if(c > 0xFFFF){
- len = uprv_itou(escaped,8,c,16,8);
- }else{
- len=uprv_itou(escaped,8,c,16,4);
- }
if(buf->currentPos!=buf->buffer){
if(*(buf->currentPos-1)==c){
buf->currentPos--;
- }else if(u_strncmp(buf->currentPos-len,escaped,len) == 0){
- while(--len>0){
- buf->currentPos--;
- }
+ } else {
+ /* ungetc failed - did not match. */
}
+ } else {
+ /* ungetc failed - beginning of buffer. */
}
}
Modified: trunk/source/tools/toolutil/ucbuf.h
===================================================================
--- trunk/source/tools/toolutil/ucbuf.h 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/tools/toolutil/ucbuf.h 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1998-2005, International Business Machines
+* Copyright (C) 1998-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -138,7 +138,7 @@
ucbuf_close(UCHARBUF* buf);
/**
- * Rewinds the buffer by one codepoint
+ * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
*/
U_CAPI void U_EXPORT2
ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
Modified: trunk/source/tools/toolutil/xmlparser.cpp
===================================================================
--- trunk/source/tools/toolutil/xmlparser.cpp 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/tools/toolutil/xmlparser.cpp 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2004-2006, International Business Machines
+* Copyright (C) 2004-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -74,10 +74,15 @@
// XML Doctype decl production #28
// example "<!DOCTYPE foo SYSTEM "somewhere" >
+ // or "<!DOCTYPE foo [internal dtd]>
// TODO: we don't actually parse the DOCTYPE or internal subsets.
// Some internal dtd subsets could confuse this simple-minded
- // attempt at skipping over them.
- mXMLDoctype(UnicodeString("(?s)<!DOCTYPE.+?>"), 0, status),
+ // attempt at skipping over them, specifically, occcurences
+ // of closeing square brackets. These could appear in comments,
+ // or in parameter entity declarations, for example.
+ mXMLDoctype(UnicodeString(
+ "(?s)<!DOCTYPE.*?(>|\\[.*?\\].*?>)"
+ ), 0, status),
// XML PI production #16
// example "<?target stuff?>
Modified: trunk/source/tools/tzcode/icuzones
===================================================================
--- trunk/source/tools/tzcode/icuzones 2008-05-25 07:57:38 UTC (rev 152)
+++ trunk/source/tools/tzcode/icuzones 2008-05-31 14:31:31 UTC (rev 153)
@@ -1,5 +1,5 @@
######################################################################
-# Copyright (C) 2007, International Business Machines
+# Copyright (C) 2007-2008, International Business Machines
# Corporation and others. All Rights Reserved.
######################################################################
# This is an ICU-specific file with the same format as regular
@@ -57,7 +57,7 @@
####Link EST America/New_York EST # Defined as -05:00
####Link Pacific/Honolulu HST # Defined as -10:00
Link America/Indiana/Indianapolis IET
-Link Asia/Calcutta IST
+Link Asia/Kolkata IST
Link Asia/Tokyo JST
#Link Asia/Tehran MET # MET is a standard UNIX zone
Link Pacific/Apia MIT
@@ -70,4 +70,4 @@
Link America/Los_Angeles PST
Link Pacific/Guadalcanal SST
#Link Etc/UTC UTC # Olson LINK
-Link Asia/Saigon VST
+Link Asia/Ho_Chi_Minh VST
More information about the sword-cvs
mailing list