[sword-cvs] icu-sword/source/test/testdata conversion.txt,NONE,1.1 nfs4_cis_prep.txt,NONE,1.1 nfs4_cs_prep_ci.txt,NONE,1.1 nfs4_cs_prep_cs.txt,NONE,1.1 nfs4_mixed_prep_p.txt,NONE,1.1 nfs4_mixed_prep_s.txt,NONE,1.1 ra.txt,NONE,1.1 riwords.txt,NONE,1.1 test4x.ucm,NONE,1.1 CollationTest_NON_IGNORABLE_STUB.txt,1.1,1.2 CollationTest_SHIFTED_STUB.txt,1.1,1.2 DataDrivenCollationTest.txt,1.1,1.2 idna_rules.txt,1.1,1.2 rbbitst.txt,1.1,1.2 regextst.txt,1.1,1.2 te.txt,1.3,1.4 test1.ucm,1.3,1.4 test3.ucm,1.3,1.4 test4.ucm,1.3,1.4 testaliases.txt,1.1,1.2 testdata.mk,1.4,1.5 testtypes.txt,1.4,1.5 translit_rules.txt,1.1,1.2 th18057.txt,1.5,NONE
sword@www.crosswire.org
sword@www.crosswire.org
Tue, 6 Apr 2004 03:11:19 -0700
- Previous message: [sword-cvs] icu-sword/source/common/unicode symtable.h,NONE,1.1 usprep.h,NONE,1.1 utrace.h,NONE,1.1 brkiter.h,1.1,1.2 caniter.h,1.1,1.2 chariter.h,1.4,1.5 dbbi.h,1.1,1.2 locid.h,1.4,1.5 normlzr.h,1.5,1.6 parsepos.h,1.1,1.2 platform.h.in,1.5,1.6 pos400.h,1.3,1.4 putil.h,1.4,1.5 pwin32.h,1.5,1.6 rbbi.h,1.1,1.2 rep.h,1.4,1.5 resbund.h,1.4,1.5 schriter.h,1.4,1.5 strenum.h,1.1,1.2 ubrk.h,1.1,1.2 uchar.h,1.5,1.6 uchriter.h,1.4,1.5 uclean.h,1.4,1.5 ucnv.h,1.4,1.5 ucnv_err.h,1.4,1.5 uenum.h,1.1,1.2 uidna.h,1.1,1.2 uiter.h,1.1,1.2 uloc.h,1.4,1.5 umachine.h,1.4,1.5 umisc.h,1.3,1.4 unifilt.h,1.1,1.2 unifunct.h,1.1,1.2 unimatch.h,1.1,1.2 uniset.h,1.1,1.2 unistr.h,1.5,1.6 unorm.h,1.4,1.5 uobject.h,1.1,1.2 urename.h,1.5,1.6 ures.h,1.5,1.6 uscript.h,1.5,1.6 uset.h,1.1,1.2 usetiter.h,1.1,1.2 ustring.h,1.4,1.5 utf.h,1.4,1.5 utf16.h,1.3,1.4 utf8.h,1.4,1.5 utypes.h,1.9,1.10 uversion.h,1.5,1.6
- Next message: [sword-cvs] icu-sword/debian rules,1.5,1.6
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvs/core/icu-sword/source/test/testdata
In directory www:/tmp/cvs-serv8911/source/test/testdata
Modified Files:
CollationTest_NON_IGNORABLE_STUB.txt
CollationTest_SHIFTED_STUB.txt DataDrivenCollationTest.txt
idna_rules.txt rbbitst.txt regextst.txt te.txt test1.ucm
test3.ucm test4.ucm testaliases.txt testdata.mk testtypes.txt
translit_rules.txt
Added Files:
conversion.txt nfs4_cis_prep.txt nfs4_cs_prep_ci.txt
nfs4_cs_prep_cs.txt nfs4_mixed_prep_p.txt
nfs4_mixed_prep_s.txt ra.txt riwords.txt test4x.ucm
Removed Files:
th18057.txt
Log Message:
ICU 2.8 sync
--- NEW FILE: conversion.txt ---
//*******************************************************************************
//
// Copyright (C) 2003, International Business Machines
// Corporation and others. All Rights Reserved.
//
// file name: conversion.txt
// encoding: US-ASCII
// tab size: 8 (not used)
// indentation:4
//
// created on: 2003jul15
// created by: Markus W. Scherer
//
// ICU resource bundle source file with test data for data-driven conversion tests.
//
//*******************************************************************************
conversion {
Info {
Description { "Test data for conversion" }
LongDescription {
"Test data for data-driven conversion tests in icu/source/test/intltest/convtest.cpp\n"
"Run intltest conversion\n"
"Charset names starting with '*' are for testdata names.\n"
"ICU callbacks are specified as strings with pairs of characters, each optional.\n"
"Callback function - '?'=Sub '0'=Skip '.'=Stop '&'=Escape\n"
"Callback option - a letter is passed in directly as const char * see ucnv_err.h\n"
"Empty string: Sub callback with NULL option\n"
"In order to specify a charset substitution character,\n"
"add a NUL (U+0000) to the callback string followed by the subchar bytes as Latin-1\n"
"characters. For example, for a Sub callback with no option and a subchar of FC FC,\n"
"use the string \"?\x00\xFC\xFC\"\n"
"fallbacks: per-direction boolean, currently only for fromUnicode; see Jitterbug 2401\n"
"errorCode: (empty)==zero | invalid | illegal | truncated | illesc | unsuppesc\n"
}
}
TestData {
toUnicode {
Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" }
Cases {
// test that ISO-2022-JP encodes ASCII as itself
{
"ISO-2022-JP",
:bin{ 3f4041424344454647 },
"?@ABCDEFG",
:intvector{ 0,1,2,3,4,5,6,7,8 },
:int{1}, :int{1}, "", "?", :bin{""}
}
// test that ISO-2022-CN encodes ASCII as itself
{
"ISO-2022-CN",
:bin{ 3f4041424344454647 },
"?@ABCDEFG",
:intvector{ 0,1,2,3,4,5,6,7,8 },
:int{1}, :int{1}, "", "?", :bin{""}
}
// ISO-2022-KR
// truncated, partial escape sequence
{
"ibm-25546",
:bin{ 1b }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 1b }
}
{
"ibm-25546",
:bin{ 1b24 }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 1b24 }
}
{
"ibm-25546",
:bin{ 1b2429 }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 1b2429 }
}
// complete escape sequence but nothing else
{
"ibm-25546",
:bin{ 1b242943 }, "", :intvector{},
:int{1}, :int{1}, "", ".", :bin{""}
}
{
"ibm-25546",
:bin{ 1b2429430e }, "", :intvector{},
:int{1}, :int{1}, "", ".", :bin{""}
}
// escape plus ASCII character
{
"ibm-25546",
:bin{ 1b24294341 }, "A", :intvector{ 4 },
:int{1}, :int{1}, "", ".", :bin{""}
}
// escape plus incomplete DBCS character
{
"ibm-25546",
:bin{ 1b2429430e41 }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 41 }
}
// all complete with DBCS character
{
"ibm-25546",
:bin{ 1b2429430e4141 }, "\uc88b", :intvector{ 5 },
:int{1}, :int{1}, "", ".", :bin{""}
}
// more complicated example
{
"ibm-25546",
:bin{ 411b242943420e4141affe0f43 },
"AB\uc88b%XAF%XFEC",
:intvector{ 0, 5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 12 },
:int{1}, :int{1}, "", "&", :bin{""}
}
// truncated, partial escape sequence
{
"ISO-2022-KR",
:bin{ 1b }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 1b }
}
{
"ISO-2022-KR",
:bin{ 1b24 }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 1b24 }
}
{
"ISO-2022-KR",
:bin{ 1b2429 }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 1b2429 }
}
// complete escape sequence but nothing else
{
"ISO-2022-KR",
:bin{ 1b242943 }, "", :intvector{},
:int{1}, :int{1}, "", ".", :bin{""}
}
{
"ISO-2022-KR",
:bin{ 1b2429430e }, "", :intvector{},
:int{1}, :int{1}, "", ".", :bin{""}
}
// escape plus ASCII character
{
"ISO-2022-KR",
:bin{ 1b24294341 }, "A", :intvector{ 4 },
:int{1}, :int{1}, "", ".", :bin{""}
}
// escape plus incomplete DBCS character
{
"ISO-2022-KR",
:bin{ 1b2429430e41 }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 41 }
}
// all complete with DBCS character
{
"ISO-2022-KR",
:bin{ 1b2429430e4141 }, "\uc88b", :intvector{ 5 },
:int{1}, :int{1}, "", ".", :bin{""}
}
// more complicated example
{
"ISO-2022-KR",
:bin{ 411b242943420e4141affe0f43 },
"AB\uc88b%XAF%XFEC",
:intvector{ 0, 5, 7, 9, 9, 9, 9, 9, 9, 9, 9, 12 },
:int{1}, :int{1}, "", "&", :bin{""}
}
// ISO-2022-JP
// truncated, partial escape sequence
{
"ISO-2022-JP",
:bin{ 1b }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 1b }
}
{
"ISO-2022-JP-2",
:bin{ 1b24 }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 1b24 }
}
// complete escape sequence but nothing else
{
"ISO-2022-JP-2",
:bin{ 1b2442 }, "", :intvector{},
:int{1}, :int{1}, "", ".", :bin{""}
}
// escape plus incomplete DBCS character
{
"ISO-2022-JP-2",
:bin{ 1b244241 }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 41 }
}
// all complete with DBCS character
{
"ISO-2022-JP-2",
:bin{ 1b24424141 }, "\u758f", :intvector{ 3 },
:int{1}, :int{1}, "", ".", :bin{""}
}
// test the G2 designator & SS2 shift
{
"ISO-2022-JP-2",
:bin{ 431b2e46461b244241411b4e4e353f }, "CF\u758f\u039e\u7591", :intvector{ 0, 4, 8, 12, 13 },
:int{1}, :int{1}, "", ".", :bin{""}
}
// JIS7 with Katakana
{
"JIS7",
:bin{ 41420e41420f4142 }, "AB\uff81\uff82AB", :intvector{ 0, 1, 3, 4, 6, 7 },
:int{1}, :int{1}, "", ".", :bin{""}
}
// JIS8 with Katakana
{
"JIS8",
:bin{ 41c15c1b284a5cc242 }, "A\uff81\\\xa5\uff82B", :intvector{ 0, 1, 2, 6, 7, 8 },
:int{1}, :int{1}, "", ".", :bin{""}
}
// ISO-2022-CN
// truncated, partial escape sequence
{
"ISO_2022,locale=zh,version=1",
:bin{ 1b }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 1b }
}
{
"ISO_2022,locale=zh,version=1",
:bin{ 1b24 }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 1b24 }
}
{
"ISO_2022,locale=zh,version=1",
:bin{ 1b2429 }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 1b2429 }
}
// complete escape sequence but nothing else
{
"ISO_2022,locale=zh,version=1",
:bin{ 1b242941 }, "", :intvector{},
:int{1}, :int{1}, "", ".", :bin{""}
}
{
"ISO_2022,locale=zh,version=1",
:bin{ 1b2429410e }, "", :intvector{},
:int{1}, :int{1}, "", ".", :bin{""}
}
// escape plus ASCII character
{
"ISO_2022,locale=zh,version=1",
:bin{ 1b24294141 }, "\x41", :intvector{ 4 },
:int{1}, :int{1}, "", ".", :bin{""}
}
// escape plus incomplete DBCS character
{
"ISO_2022,locale=zh,version=1",
:bin{ 1b2429410e41 }, "", :intvector{},
:int{1}, :int{1}, "truncated", ".", :bin{ 41 }
}
// all complete with DBCS character
{
"ISO_2022,locale=zh,version=1",
:bin{ 1b2429410e4141 }, "\u4eae", :intvector{ 5 },
:int{1}, :int{1}, "", ".", :bin{""}
}
// ISO-2022-CN-EXT with all subcharsets and shifts and with supplementary code points
{
"ISO-2022-CN-EXT",
:bin{ 1b2429411b242a480e41411b2429457e7c1b4e70341b242b4d1b2429477c341b4f664c2421 },
"\u4eae\u9f82\u56cd\u56cc\U0002a6d6\x30",
:intvector{ 9, 15, 19, 29, 33, 33, 35 },
:int{1}, :int{1}, "", ".", :bin{""}
}
// illegal and unsupported escape sequences
// SS2 without designator: illegal
{
"ISO-2022-CN-EXT",
:bin{ 411b4e2121 }, "\x41", :intvector{ 0 },
:int{1}, :int{1}, "illesc", ".", :bin{ 1b4e }
}
// G3 designator: recognized, but not supported for -CN (only for -CN-EXT)
{
"ISO-2022-CN",
:bin{ 411b242b491b4f2121 }, "\x41", :intvector{ 0 },
:int{1}, :int{1}, "unsuppesc", ".", :bin{ 1b242b49 }
}
// ISO-2022 SBCS
// [U_ENABLE_GENERIC_ISO_2022]
// The _generic_ ISO-2022 converter is disabled starting 2003-dec-03 (ICU 2.8).
// For details see the icu mailing list from 2003-dec-01 and the ucnv2022.c file.
// Language-specific variants of ISO-2022 continue to be available as listed below.
//{
// "ISO_2022",
// :bin{ 0008090a0d1a1c1f203f415c7d7e7f },
// "\x00\x08\t\n\r\x1a\x1c\x1f ?A\\}~\x7f",
// :intvector{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 },
// :int{1}, :int{1}, "", ".", :bin{""}
//}
// DBCS-only extensions
{
"ibm-970",
:bin{ 617eece9b2eb },
"\x61\x7e\u4e00\ub000",
:intvector{ 0, 1, 2, 4 },
:int{1}, :int{1}, "", "?", :bin{""}
}
{
"ibm-971",
:bin{ 617eece9b2eb },
"\ufffd\u4e00\ub000",
:intvector{ 0, 2, 4 },
:int{1}, :int{1}, "", "?", :bin{""}
}
{
"ibm-16684",
:bin{ 430e4395ecc1404042e1 },
"\ufffd\u30C8\u30C8\u309A\u3000\u20ac",
:intvector{ 0, 2, 4, 4, 6, 8 },
:int{1}, :int{0}, "", "?", :bin{""}
}
{
"ibm-1399",
:bin{ 430e4395ecc140400fe1 },
"\uff62\u30C8\u30C8\u309A\u3000\u20ac",
:intvector{ 0, 2, 4, 4, 6, 9 },
:int{1}, :int{0}, "", "?", :bin{""}
}
// extensions
{
"ibm-1390",
:bin{ 430e4395ecc1 },
"\uff63\u30C8\u30C8\u309A",
:intvector{ 0, 2, 4, 4 },
:int{1}, :int{0}, "", "?", :bin{""}
}
{
"ibm-16684",
:bin{ ececec8bec8cec8d4386ecb5ecb6ecb7 },
"\ufffd\u31f6\u31f7\u31f8\u30ab\u304b\u309a\u304d\u309a\u304f\u309a",
:intvector{ 0, 2, 4, 6, 8, 10, 10, 12, 12, 14, 14 },
:int{1}, :int{0}, "", "?", :bin{""}
}
{
"ibm-1390",
:bin{ 43860eececec8bec8cec8d4386ecb5ecb6ecb7ecc10fec },
"\uff63\uff76\ufffd\u31f6\u31f7\u31f8\u30ab\u304b\u309a\u304d\u309a\u304f\u309a\u30C8\u309A\x1a",
:intvector{ 0, 1, 3, 5, 7, 9, 11, 13, 13, 15, 15, 17, 17, 19, 19, 22 },
:int{1}, :int{0}, "", "?", :bin{""}
}
{
"*test3",
:bin{ 00050601020b0701020a01020c },
"\u20ac\x05\x06\x0b\U00101234\U00023456\ufffd",
:intvector{ 0, 1, 2, 3, 6, 6, 7, 7, 10 },
:int{1}, :int{0}, "", "?", :bin{""}
}
// normal conversions
{
"UTF-16LE",
:bin{ 310000d801dc00d902dc320000d8330001dc3400 },
"1\U00010001\U000500022\ufffd3\ufffd4",
:intvector{ 0, 2, 2, 6, 6, 10, 12, 14, 16, 18 },
:int{1}, :int{0}, "", "?", :bin{""}
}
{ "UTF-16LE", :bin{ 00 }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ 00 } }
{ "UTF-16LE", :bin{ 00d800 }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ 00d800 } }
{
"UTF-16BE",
:bin{ 0031d800dc01d900dc020032d8000033dc010034 },
"1\U00010001\U000500022\ufffd3\ufffd4",
:intvector{ 0, 2, 2, 6, 6, 10, 12, 14, 16, 18 },
:int{1}, :int{0}, "", "?", :bin{""}
}
{ "UTF-16BE", :bin{ 00 }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ 00 } }
{ "UTF-16BE", :bin{ d800dc }, "", :intvector{}, :int{1}, :int{0}, "truncated", ".", :bin{ d800dc } }
// e4b8 is a partial sequence
{ "UTF-8", :bin{ 31e4ba8ce4b8 }, "1\u4e8c", :intvector{ 0, 1 }, :int{1}, :int{0}, "truncated", ".", :bin{ e4b8 } }
{ "UTF-8", :bin{ 31e4ba8ce4b8 }, "1\u4e8c\ufffd", :intvector{ 0, 1, 4 }, :int{1}, :int{0}, "", "?", :bin{""} }
// LMBCS with escape callback (1292a0 is unassigned)
{
"LMBCS",
:bin{ 12c9501292a01292a1 },
"\u4e2e%X12%X92%XA0\ue5c4",
:intvector{ 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6 },
:int{1}, :int{0}, "", "&", :bin{""}
}
// IMAP-mailbox-name with SUB
// a<DEL> a&AB~ a&AB\x0c a&AB- a&AB. a&.
{
"IMAP-mailbox-name",
:bin{ 617f612641427e612641420c612641422d612641422e61262e },
"a\ufffda\ufffda\ufffda\ufffda\ufffda\ufffd",
:intvector{ 0, 1, 2, 4, 7, 9, 12, 14, 17, 19, 22, 23 },
:int{1}, :int{0}, "", "?", :bin{""}
}
// using testdata_test1.cnv
{ "*test1", :bin{ 000506070809 }, "\u20ac\x05\x06\U00101234\ufffd\ufffd", :intvector{ 0, 1, 2, 3, 3, 4, 5 }, :int{1}, :int{0}, "", "", :bin{""} }
// surrogates in CESU-8
{ "CESU-8", :bin{ eda080eda081edb081 }, "\ud800\U00010401", :intvector{ 0, 3, 6 }, :int{1}, :int{0}, "", "", :bin{""} }
// e080 is a partial sequence
{ "UTF-8", :bin{ 31ffe4ba8ce08061 }, "1\ufffd\u4e8c\ufffda", :intvector{ 0, 1, 2, 5, 7 }, :int{0}, :int{0}, "", "", :bin{ e080 } }
// fbbfbfbfbf exceedes U+10ffff
{ "UTF-8", :bin{ 31fbbfbfbfbf61 }, "1\ufffda", :intvector{ 0, 1, 6 }, :int{0}, :int{0}, "", "", :bin{ fbbfbfbfbf } }
// lead byte a2 without trail byte
{ "ibm-1363", :bin{ a2aea2 }, "\u00a1", :intvector{ 0 }, :int{1}, :int{0}, "truncated", ".", :bin{ a2 } }
{ "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "", "?", :bin{""} }
// simple sample, no error handling
{ "UTF-8", :bin{ 61F48FBFBF }, "a\U0010FFFF", :intvector{ 0, 1, 1 }, :int{1}, :int{0}, "", "", :bin{""} }
}
}
// --------------------------------------------------------------------- ***
fromUnicode {
Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" }
Cases {
// test that ISO-2022-JP encodes ASCII as itself
{
"ISO-2022-JP",
"?@ABCDEFG",
:bin{ 3f4041424344454647 },
:intvector{ 0,1,2,3,4,5,6,7,8 },
:int{1}, :int{1}, "", "?", ""
}
// test that ISO-2022-CN encodes ASCII as itself
{
"ISO-2022-CN",
"?@ABCDEFG",
:bin{ 3f4041424344454647 },
:intvector{ 0,1,2,3,4,5,6,7,8 },
:int{1}, :int{1}, "", "?", ""
}
// moved from cintltst /tsconv/nccbtst/TestSkipCallBack
{
"iso-2022-jp",
"\u3000\xe9\u3001",
:bin{ 1b2442212121221b2842 },
:intvector{ 0,0,0,0,0,2,2,2,2,2 },
:int{1}, :int{1}, "", "0", ""
}
// moved from cintltst /tsconv/nccbtst/TestSubCallBack
{
"iso-2022-jp",
"A\xe9B\xe9\u3000",
:bin{ 411a421a1b244221211b2842 },
:intvector{ 0,1,2,3,4,4,4,4,4,4,4,4 },
:int{1}, :int{1}, "", "?", ""
}
// moved from cintltst /tsconv/nccbtst/TestSubWithValueCallBack
{
"iso-2022-jp",
"A\xe9B\xe9\u3000",
:bin{ 41255530304539422555303045391b244221211b2842 },
:intvector{ 0,1,1,1,1,1,1,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4 },
:int{1}, :int{1}, "", "&", ""
}
{
"iso-2022-cn",
"\u4e00\u3712\u4e01",
:bin{ 1b2429410e523b0f2555333731320e36210f },
:intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2 },
:int{1}, :int{1}, "", "&", ""
}
{
"iso-2022-cn",
"A\u3712\u4e00",
:bin{ 412555333731321b2429410e523b0f },
:intvector{ 0,1,1,1,1,1,1,2,2,2,2,2,2,2,2 },
:int{1}, :int{1}, "", "&", ""
}
{
"iso-2022-cn",
"\u3000\u3712\u3001",
:bin{ 1b2429410e21210f2555333731320e21220f },
:intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2 },
:int{1}, :int{1}, "", "&", ""
}
// moved from cintltst /tsconv/nucnvtst/TestJIS
{
"JIS",
"\uFF81\uFF82\u30EC\u30ED\u30EE\u30EF\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98",
:bin{ 1b244225412544256c256d256e256F25622564256625682569256a1b2842 },
:intvector{ 0,0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,11,11,11 },
:int{1}, :int{1}, "", "?", ""
}
{
"JIS7",
"\uFF81\uFF82\u30EC\u30ED\u30EE\u30EF\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98",
:bin{ 0e41420f1b2442256c256d256e256F0e5354555657580f1b2842 },
:intvector{ 0,0,1,2,2,2,2,2,2,3,3,4,4,5,5,6,6,7,8,9,10,11,11,11,11,11 },
:int{1}, :int{1}, "", "?", ""
}
{
"JIS8",
"\uFF81\uFF82\u30EC\u30ED\u30EE\u30EF\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98",
:bin{ C1C21b2442256c256d256e256F1b284AD3D4D5D6D7D81b2842 },
:intvector{ 0,1,2,2,2,2,2,3,3,4,4,5,5,6,6,6,6,7,8,9,10,11,11,11,11 },
:int{1}, :int{1}, "", "?", ""
}
// moved from cintltst /tsconv/ncnvtst/TestErrorBehaviour
{
"iso-2022-jp",
"\u3000\x50\udc01\u3001",
:bin{ 1B244221211B2842501A1B24422122 },
:intvector{ 0,0,0,0,0,1,1,1,1,2,3,3,3,3,3 },
:int{0}, :int{1}, "", "?", "\udc01"
}
{
"iso-2022-jp",
"\u3000\x50\udc01\u3001",
:bin{ 1B244221211B2842501A1B244221221b2842 },
:intvector{ 0,0,0,0,0,1,1,1,1,2,3,3,3,3,3,3,3,3 },
:int{1}, :int{1}, "", "?", ""
}
{
"iso-2022-kr",
"\x61\u4e00\udc01\u4e00",
:bin{ 1b242943610e6c690f1a0e6c69 },
:intvector{ -1,-1,-1,-1,0,1,1,1,2,2,3,3,3 },
:int{0}, :int{1}, "", "?", "\udc01"
}
{
"iso-2022-kr",
"\x61\u4e00\udc01\u4e00",
:bin{ 1b242943610e6c690f1a0e6c690f },
:intvector{ -1,-1,-1,-1,0,1,1,1,2,2,3,3,3,3 },
:int{1}, :int{1}, "", "?", ""
}
// ISO-2022-KR
{
"ibm-25546",
"AB\uc88b\U00050005\uacccC",
:bin{ 1b24294341420e41410f7b552b35303030357d0e306a0f43 },
:intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,3,3,3,3,3,3,3,3,5,5,5,6,6 },
:int{1}, :int{1}, "", "&U", ""
}
{
"ibm-25546",
"AB\uc88b\U00050005\uacccC",
:bin{ 1b24294341420e41410f1a0e306a0f43 },
:intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,5,6,6 },
:int{1}, :int{1}, "", "?\x00\x1a", ""
}
{
"ibm-25546",
"AB\uc88b\U00050005\uacccC",
:bin{ 1b24294341420e41412f7e306a0f43 },
:intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,6,6 },
:int{1}, :int{1}, "", "?", ""
}
{
"ibm-25546",
"AB\uc88b\U00050005\uaccc",
:bin{ 1b24294341420e41412f7e306a0f },
:intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,5 },
:int{1}, :int{1}, "", "?", ""
}
{
"ISO-2022-KR",
"AB\uc88b\U00050005\uacccC",
:bin{ 1b24294341420e41410f7b552b35303030357d0e306a0f43 },
:intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,3,3,3,3,3,3,3,3,5,5,5,6,6 },
:int{1}, :int{1}, "", "&U", ""
}
{
"ISO-2022-KR",
"AB\uc88b\U00050005\uacccC",
:bin{ 1b24294341420e41410f1a0e306a0f43 },
:intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,5,6,6 },
:int{1}, :int{1}, "", "?", ""
}
{
"ISO-2022-KR",
"AB\uc88b\U00050005\uacccC",
:bin{ 1b24294341420e41412f7e306a0f43 },
:intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,6,6 },
:int{1}, :int{1}, "", "?\x00\x2f\x7e", ""
}
{
"ISO-2022-KR",
"AB\uc88b\U00050005\uaccc",
:bin{ 1b24294341420e41412f7e306a0f },
:intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,5 },
:int{1}, :int{1}, "", "?\x00\x2f\x7e", ""
}
// ISO-2022-JP-2 with G2 designator & SS2 shift
{
"ISO-2022-JP-2",
"CF\u758f\u038f\u7591",
:bin{ 43461b244241411b2e461b4e3f353f1b2842 },
:intvector{ 0,1,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4 },
:int{1}, :int{1}, "", ".", ""
}
// JIS7 with Katakana
{
"JIS7",
"AB\uff81\uff82AB",
:bin{ 41420e41420f4142 },
:intvector{ 0,1,2,2,3,4,4,5 },
:int{1}, :int{1}, "", ".", ""
}
// JIS7 with shift to ASCII at the very end
{
"JIS7",
"AB\uff81\uff82",
:bin{ 41420e41420f },
:intvector{ 0,1,2,2,3,3 },
:int{1}, :int{1}, "", ".", ""
}
// JIS8 with Katakana
{
"JIS8",
"A\uff81\\\xa5\uff82B",
:bin{ 41c15c1b284a5cc2421b2842 },
:intvector{ 0,1,2,3,3,3,3,4,5,5,5,5 },
:int{1}, :int{1}, "", ".", ""
}
// ISO-2022-CN-EXT with all subcharsets and shifts and with supplementary code points
{
"ISO-2022-CN-EXT",
"\u4eae\u9f82\u56cd\u56cc\U0002a6d6\x30",
:bin{ 1b2429410e41411b2429457e7c1b242a481b4e70341b2429477c341b242b4d1b4f664c0f30 },
:intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4,6,6 },
:int{1}, :int{1}, "", ".", ""
}
// ISO-2022-CN-EXT with shift to ASCII at the very end
{
"ISO-2022-CN-EXT",
"\u4eae\u9f82\u56cd\u56cc\U0002a6d6",
:bin{ 1b2429410e41411b2429457e7c1b242a481b4e70341b2429477c341b242b4d1b4f664c0f },
:intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4 },
:int{1}, :int{1}, "", ".", ""
}
// ISO-2022-CN-EXT without flush so do not shift to ASCII at the very end
{
"ISO-2022-CN-EXT",
"\u4eae\u9f82\u56cd\u56cc\U0002a6d6",
:bin{ 1b2429410e41411b2429457e7c1b242a481b4e70341b2429477c341b242b4d1b4f664c },
:intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4 },
:int{0}, :int{1}, "", ".", ""
}
// windows-936 vs. ibm-1386
{
"ibm-1386",
"\x1a\u20ac\u5555\x80\x81\U00055555",
:bin{ 7fa2e3dffb7f7fa1a1 },
:intvector{ 0, 1, 1, 2, 2, 3, 4, 5, 5 },
:int{1}, :int{1}, "", "?", ""
}
{
"windows-936",
"\x1a\u20ac\u5555\x80\x81\U00055555",
:bin{ 1a80dffb3f3f3f },
:intvector{ 0, 1, 2, 2, 3, 4, 5 },
:int{1}, :int{1}, "", "?", ""
}
// verify that if a conversion table does not have any mapping for U+0000,
// then there will not even be a phantom fallback to 00
{
"ibm-971",
"\x00",
:bin{ affe },
:intvector{ 0, 0 },
:int{1}, :int{1}, "", "?", ""
}
{
"*test4",
"\x00",
:bin{ ff },
:intvector{ 0 },
:int{1}, :int{1}, "", "?", ""
}
// extension in testdata
{
"*test4x",
"\u20ac\x09",
:bin{ 0009 },
:intvector{ 0, 1 },
:int{1}, :int{1}, "", "?", ""
}
// DBCS-only extensions
{
"ibm-970",
"\x61\uffa1\u2015\ub000",
:bin{ 611aa1aab2eb },
:intvector{ 0, 1, 2, 2, 3, 3 },
:int{1}, :int{1}, "", "?", ""
}
{
"ibm-971",
"\x61\uffa1\u2015\ub000",
:bin{ affeaffeaffeb2eb },
:intvector{ 0, 0, 1, 1, 2, 2, 3, 3 },
:int{1}, :int{1}, "", "?", ""
}
{
"ibm-1390,swaplfnl",
"\uff63\u30C8\u30C8\u309A\u3000\x41\u20ac\x0a",
:bin{ 430e4395ecc140400fc1e115 },
:intvector{ 0, 1, 1, 1, 2, 2, 4, 4, 5, 5, 6, 7 },
:int{1}, :int{0}, "", "?", ""
}
{
"ibm-16684",
"\uff63\u30C8\u30C8\u309A\u3000\x41\u20ac\x0a",
:bin{ fefe4395ecc14040fefe42e1fefe },
:intvector{ 0, 0, 1, 1, 2, 2, 4, 4, 5, 5, 6, 6, 7, 7 },
:int{1}, :int{0}, "", "?", ""
}
{
"ibm-1399",
"\uff63\u30C8\u30C8\u309A\u3000\x41\u20ac\x0a",
:bin{ 440e4395ecc140400fc1e125 },
:intvector{ 0, 1, 1, 1, 2, 2, 4, 4, 5, 5, 6, 7 },
:int{1}, :int{0}, "", "?", ""
}
// <subchar1> from |2 mappings
{
"ibm-1390",
"\x0e\x0f\u0901\U00050000\uffe8\uffee",
:bin{ 3f3f0efefefefe0f3f3f },
:intvector{ 0, 1, 2, 2, 2, 3, 3, 5, 5, 6 },
:int{1}, :int{1}, "", "?", ""
}
// <subchar1> from |2 mappings, and also contains a fallback to 00
{
"*test4",
"\u20ac\u20ad\U00050005\U00023456\U0010ffff\x30",
:bin{ 0000e10102030affff },
:intvector{ 0, 1, 2, 4, 4, 4, 4, 6, 8 },
:int{1}, :int{1}, "", "?", ""
}
// setting a <subchar> resets the <subchar1>
{
"*test4",
"\u20ac\u20ad\U00050005\U00023456\U0010ffff\x30",
:bin{ 00000102030f0102030a0102030f0102030f },
:intvector{ 0, 1, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8, 8, 8, 8 },
:int{1}, :int{1}, "", "?\x00\x01\x02\x03\x0f", ""
}
// fallback to 00 with old single-byte data structure
{
"*test1",
"\u20ac\u20ad\U00101234\U00050000",
:bin{ 000007ff },
:intvector{ 0, 1, 2, 4 },
:int{1}, :int{1}, "", "?", ""
}
// extensions
{
"ibm-1390",
"\u025a\u025a\u0300\u025a\u0301\u025a\u0302\uffe8\U0002a0f9",
:bin{ 0ed896eccaeccbd896ea530f3f0eb7c20f },
:intvector{ 0, 0, 0, 1, 1, 3, 3, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8 },
:int{1}, :int{0}, "", "?", ""
}
{
"*test3",
"\xc4\xc4\xc4\U00101234\xc4\xc4\U00101234\x05",
:bin{ ffffff070501020c },
:intvector{ 0, 1, 2, 3, 5, 5, 5, 5 },
:int{1}, :int{0}, "", "?", ""
}
{
"*test3",
"\U00101234\U00101234\U00050005\U00101234\U00050005\U00060006",
:bin{ 07070001020e05070001020f09 },
:intvector{ 0, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6 },
:int{1}, :int{0}, "", "?", ""
}
// normal conversions
{
"UTF-16LE",
"1\U00010001\U000500022\ud8003\udc014",
:bin{ 310000d801dc00d902dc3200fdff3300fdff3400 },
:intvector{ 0, 0, 1, 1, 1, 1, 3, 3, 3, 3, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9 },
:int{1}, :int{0}, "", "?", ""
}
{ "UTF-16LE", "\ud800", :bin{""}, :intvector{}, :int{1}, :int{0}, "truncated", ".", "\ud800" }
{
"UTF-16BE",
"1\U00010001\U000500022\ud8003\udc014",
:bin{ 0031d800dc01d900dc020032fffd0033fffd0034 },
:intvector{ 0, 0, 1, 1, 1, 1, 3, 3, 3, 3, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9 },
:int{1}, :int{0}, "", "?", ""
}
{ "UTF-16BE", "\ud800", :bin{""}, :intvector{}, :int{1}, :int{0}, "truncated", ".", "\ud800" }
// escape callback
{
"ISCII",
"A\u0901\U00023456\u0902B\U00023456C",
:bin{ 41ef42a1255544383444255544433536a24225554438344425554443353643 },
:intvector{
0,
1,1,1,
2,2,2,2,2,2,
2,2,2,2,2,2,
4,
5,
6,6,6,6,6,6,
6,6,6,6,6,6,
8
},
:int{1}, :int{0}, "", "&", ""
}
// escape callback (hex)
{
"iso-2022-jp",
"\u3000\U00023456\u3001\U00023456B\u901c",
:bin{ 1b244221211b284226237832333435363b1b244221221b284226237832333435363b42262378393031433b },
:intvector{
0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,
3,3,3,3,3,
4,4,4,4,4,4,4,4,4,4,4,4,
6,
7,7,7,7,7,7,7,7
},
:int{1}, :int{0}, "", "&X", ""
}
// sub callback
{
"gb18030",
"$\x7f\x80\u01f9\u20ac\u4e00\u9fa6\uffff\U00010000\U0010ffff",
:bin{ 247f81308130a8bfa2e3d2bb82358f338431a43990308130e3329a35 },
:intvector{ 0, 1, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 10, 10, 10, 10 },
:int{1}, :int{0}, "", "?", ""
}
// skip callback
{ "ibm-930", "\u6D63\u6D64\u6D65\u6D66", :bin{ 0e5d5f5d63466b0f }, :intvector{ 0, 0, 0, 1, 1, 3, 3, 3 }, :int{1}, :int{0}, "", "0", "" }
{ "ibm-930", "\u6D63\u6D64\ud89a\u6D66", :bin{ 0e5d5f5d63466b0f }, :intvector{ 0, 0, 0, 1, 1, 3, 3, 3 }, :int{1}, :int{0}, "", "0", "" }
{ "ibm-930", "\u6D63\u6D64\ud89a\u6D66", :bin{ 0e5d5f5d63 }, :intvector{ 0, 0, 0, 1, 1 }, :int{1}, :int{0}, "illegal", "0i", "\ud89a" }
// sub callback for supplementary code point
{ "LATIN1", "1\U000104012", :bin{ 311a32 }, :intvector{ 0, 1, 3 }, :int{1}, :int{0}, "", "", "" }
{ "ibm-920", "1\U000104012", :bin{ 311a32 }, :intvector{ 0, 1, 3 }, :int{1}, :int{0}, "", "", "" }
// sub callback with AA as subchar
{ "ibm-920", "1\U000104012", :bin{ 31AA32 }, :intvector{ 0, 1, 3 }, :int{1}, :int{0}, "", "?\x00\xAA", "" }
// same but not flushing
{ "LATIN1", "1\U000104012", :bin{ 311a32 }, :intvector{ 0, 1, 3 }, :int{0}, :int{0}, "", "", "\U00010401" }
{ "ibm-920", "1\U000104012", :bin{ 311a32 }, :intvector{ 0, 1, 3 }, :int{0}, :int{0}, "", "", "\U00010401" }
// simple sample, no error handling
{ "UTF-8", "a\U0010FFFF", :bin{ 61F48FBFBF }, :intvector{ 0, 1, 1, 1, 1 }, :int{1}, :int{0}, "", "", "" }
}
}
getUnicodeSet {
// charset - will be opened, and ucnv_getUnicodeSet() called on it
// map - set of code points and strings that must be in the returned set
// mapnot - set of code points and strings that must *not* be in the returned set
// which - numeric UConverterUnicodeSet value
Headers { "charset", "map", "mapnot", "which" }
Cases {
// ISO-2022-KR
{
"ISO-2022-KR",
"[\x00-\x7f\xa1\xa4\xfe\u0111\u4e00\u4e01\uac00-\uac02\uffe6]",
"[\x80-\xa0\xa3\xa5\xff-\u0110\uac03\uffe7-\U0010ffff]",
:int{0}
}
// versions of ISO-2022-JP
{
"ISO-2022-JP",
"[\x00-\x7f\u0391-\u03a1\uff61-\uff9f\u4e00\u4e01\uffe5]",
"[\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\uffe6-\U0010ffff]",
:int{0}
}
{
"ISO-2022-JP-2",
"[\x00-\u0113\u0385-\u038a\u0390-\u03a1\uff61-\uff9f\u4e00-\u4e05\uffe6]",
"[\uffe7-\U0010ffff]",
:int{0}
}
// versions of ISO-2022-CN
{
"ISO-2022-CN",
"[\x00-\x7f\u4e00\u4e01\u9f98\ufe6b]",
"[\u4e29\uffe6-\U0010ffff]",
:int{0}
}
{
"ISO-2022-CN-EXT",
"[\x00-\x7f\u4e00-\u4e05\u9f98\ufe6b\u4e28-\u4e2b\U00020000\U00020003-\U00020005\U00029664]",
"[\U00020001\U00020002\U0002a6d7-\U0010ffff]",
:int{0}
}
// DBCS-only
{
"ibm-971",
"[\xa1\xa4\uac01\ub000]",
"[\x00-\x9f\u2015]",
:int{0}
}
{
"ibm-16684",
"[\xa0\xa1\xa4\xa6-\xab\xad-\u017f\u0254\u309b-\u30ff\u4e00-\u4e05\U00023d00\U000243bc\U0002a6b2"
"{\u0254\u0300}{\u0254\u0301}{\u304b\u309a}{\u30ad\u309a}{\u30af\u309a}]",
"[\x00-0x9f\xa2\xa3\xa5\xac\u0200-\u024f\U00010000-\U0001ffff\U0002a61b-\U0002a6b1]",
:int{0}
}
// extensions
{
"ibm-1390",
"[\x00-\x0d\x10-\u017f\u0254\u309b-\u30ff\u4e00-\u4e05\U00023d00\U000243bc\U0002a6b2"
"{\u0254\u0300}{\u0254\u0301}{\u304b\u309a}{\u30ad\u309a}{\u30af\u309a}]",
"[\x0e\x0f\u0200-\u024f\U00010000-\U0001ffff\U0002a61b-\U0002a6b1]",
:int{0}
}
{
"*test3",
"[\x05\x0b\xc0\u20ac\U00023456\U00101234"
"{\U00101234\U00050005\U00060006}{\U00101234\U00050005}{\U00101234\U00060006}{\xc4\xc4\U00101234\x05}]",
"[\x06\x0e\U00034567\U000febcd{\U00101234\U00070007}]",
:int{0}
}
}
}
}
}
--- NEW FILE: nfs4_cis_prep.txt ---
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others. All Rights Reserved.
###################
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
###################
# This table contains code points from Table A.1 from RFC 3454
0221; ; UNASSIGNED
0234..024F; ; UNASSIGNED
02AE..02AF; ; UNASSIGNED
02EF..02FF; ; UNASSIGNED
0350..035F; ; UNASSIGNED
0370..0373; ; UNASSIGNED
0376..0379; ; UNASSIGNED
037B..037D; ; UNASSIGNED
[...1906 lines suppressed...]
202B; ; PROHIBITED
202C; ; PROHIBITED
202D; ; PROHIBITED
202E; ; PROHIBITED
206A; ; PROHIBITED
206B; ; PROHIBITED
206C; ; PROHIBITED
206D; ; PROHIBITED
206E; ; PROHIBITED
206F; ; PROHIBITED
# Total code points 15
# code points from Table C.9
E0001; ; PROHIBITED
E0020..E007F; ; PROHIBITED
# Total code points 82
--- NEW FILE: nfs4_cs_prep_ci.txt ---
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others. All Rights Reserved.
###################
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
###################
# This table contains code points from Table A.1 from RFC 3454
0221; ; UNASSIGNED
0234..024F; ; UNASSIGNED
02AE..02AF; ; UNASSIGNED
02EF..02FF; ; UNASSIGNED
0350..035F; ; UNASSIGNED
0370..0373; ; UNASSIGNED
0376..0379; ; UNASSIGNED
037B..037D; ; UNASSIGNED
[...1863 lines suppressed...]
202B; ; PROHIBITED
202C; ; PROHIBITED
202D; ; PROHIBITED
202E; ; PROHIBITED
206A; ; PROHIBITED
206B; ; PROHIBITED
206C; ; PROHIBITED
206D; ; PROHIBITED
206E; ; PROHIBITED
206F; ; PROHIBITED
# Total code points 15
# code points from Table C.9
E0001; ; PROHIBITED
E0020..E007F; ; PROHIBITED
# Total code points 82
--- NEW FILE: nfs4_cs_prep_cs.txt ---
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others. All Rights Reserved.
###################
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
###################
# This table contains code points from Table A.1 from RFC 3454
0221; ; UNASSIGNED
0234..024F; ; UNASSIGNED
02AE..02AF; ; UNASSIGNED
02EF..02FF; ; UNASSIGNED
0350..035F; ; UNASSIGNED
0370..0373; ; UNASSIGNED
0376..0379; ; UNASSIGNED
037B..037D; ; UNASSIGNED
037F..0383; ; UNASSIGNED
038B; ; UNASSIGNED
038D; ; UNASSIGNED
03A2; ; UNASSIGNED
03CF; ; UNASSIGNED
03F7..03FF; ; UNASSIGNED
0487; ; UNASSIGNED
04CF; ; UNASSIGNED
04F6..04F7; ; UNASSIGNED
04FA..04FF; ; UNASSIGNED
0510..0530; ; UNASSIGNED
0557..0558; ; UNASSIGNED
0560; ; UNASSIGNED
0588; ; UNASSIGNED
058B..0590; ; UNASSIGNED
05A2; ; UNASSIGNED
05BA; ; UNASSIGNED
05C5..05CF; ; UNASSIGNED
05EB..05EF; ; UNASSIGNED
05F5..060B; ; UNASSIGNED
060D..061A; ; UNASSIGNED
061C..061E; ; UNASSIGNED
0620; ; UNASSIGNED
063B..063F; ; UNASSIGNED
0656..065F; ; UNASSIGNED
06EE..06EF; ; UNASSIGNED
06FF; ; UNASSIGNED
070E; ; UNASSIGNED
072D..072F; ; UNASSIGNED
074B..077F; ; UNASSIGNED
07B2..0900; ; UNASSIGNED
0904; ; UNASSIGNED
093A..093B; ; UNASSIGNED
094E..094F; ; UNASSIGNED
0955..0957; ; UNASSIGNED
0971..0980; ; UNASSIGNED
0984; ; UNASSIGNED
098D..098E; ; UNASSIGNED
0991..0992; ; UNASSIGNED
09A9; ; UNASSIGNED
09B1; ; UNASSIGNED
09B3..09B5; ; UNASSIGNED
09BA..09BB; ; UNASSIGNED
09BD; ; UNASSIGNED
09C5..09C6; ; UNASSIGNED
09C9..09CA; ; UNASSIGNED
09CE..09D6; ; UNASSIGNED
09D8..09DB; ; UNASSIGNED
09DE; ; UNASSIGNED
09E4..09E5; ; UNASSIGNED
09FB..0A01; ; UNASSIGNED
0A03..0A04; ; UNASSIGNED
0A0B..0A0E; ; UNASSIGNED
0A11..0A12; ; UNASSIGNED
0A29; ; UNASSIGNED
0A31; ; UNASSIGNED
0A34; ; UNASSIGNED
0A37; ; UNASSIGNED
0A3A..0A3B; ; UNASSIGNED
0A3D; ; UNASSIGNED
0A43..0A46; ; UNASSIGNED
0A49..0A4A; ; UNASSIGNED
0A4E..0A58; ; UNASSIGNED
0A5D; ; UNASSIGNED
0A5F..0A65; ; UNASSIGNED
0A75..0A80; ; UNASSIGNED
0A84; ; UNASSIGNED
0A8C; ; UNASSIGNED
0A8E; ; UNASSIGNED
0A92; ; UNASSIGNED
0AA9; ; UNASSIGNED
0AB1; ; UNASSIGNED
0AB4; ; UNASSIGNED
0ABA..0ABB; ; UNASSIGNED
0AC6; ; UNASSIGNED
0ACA; ; UNASSIGNED
0ACE..0ACF; ; UNASSIGNED
0AD1..0ADF; ; UNASSIGNED
0AE1..0AE5; ; UNASSIGNED
0AF0..0B00; ; UNASSIGNED
0B04; ; UNASSIGNED
0B0D..0B0E; ; UNASSIGNED
0B11..0B12; ; UNASSIGNED
0B29; ; UNASSIGNED
0B31; ; UNASSIGNED
0B34..0B35; ; UNASSIGNED
0B3A..0B3B; ; UNASSIGNED
0B44..0B46; ; UNASSIGNED
0B49..0B4A; ; UNASSIGNED
0B4E..0B55; ; UNASSIGNED
0B58..0B5B; ; UNASSIGNED
0B5E; ; UNASSIGNED
0B62..0B65; ; UNASSIGNED
0B71..0B81; ; UNASSIGNED
0B84; ; UNASSIGNED
0B8B..0B8D; ; UNASSIGNED
0B91; ; UNASSIGNED
0B96..0B98; ; UNASSIGNED
0B9B; ; UNASSIGNED
0B9D; ; UNASSIGNED
0BA0..0BA2; ; UNASSIGNED
0BA5..0BA7; ; UNASSIGNED
0BAB..0BAD; ; UNASSIGNED
0BB6; ; UNASSIGNED
0BBA..0BBD; ; UNASSIGNED
0BC3..0BC5; ; UNASSIGNED
0BC9; ; UNASSIGNED
0BCE..0BD6; ; UNASSIGNED
0BD8..0BE6; ; UNASSIGNED
0BF3..0C00; ; UNASSIGNED
0C04; ; UNASSIGNED
0C0D; ; UNASSIGNED
0C11; ; UNASSIGNED
0C29; ; UNASSIGNED
0C34; ; UNASSIGNED
0C3A..0C3D; ; UNASSIGNED
0C45; ; UNASSIGNED
0C49; ; UNASSIGNED
0C4E..0C54; ; UNASSIGNED
0C57..0C5F; ; UNASSIGNED
0C62..0C65; ; UNASSIGNED
0C70..0C81; ; UNASSIGNED
0C84; ; UNASSIGNED
0C8D; ; UNASSIGNED
0C91; ; UNASSIGNED
0CA9; ; UNASSIGNED
0CB4; ; UNASSIGNED
0CBA..0CBD; ; UNASSIGNED
0CC5; ; UNASSIGNED
0CC9; ; UNASSIGNED
0CCE..0CD4; ; UNASSIGNED
0CD7..0CDD; ; UNASSIGNED
0CDF; ; UNASSIGNED
0CE2..0CE5; ; UNASSIGNED
0CF0..0D01; ; UNASSIGNED
0D04; ; UNASSIGNED
0D0D; ; UNASSIGNED
0D11; ; UNASSIGNED
0D29; ; UNASSIGNED
0D3A..0D3D; ; UNASSIGNED
0D44..0D45; ; UNASSIGNED
0D49; ; UNASSIGNED
0D4E..0D56; ; UNASSIGNED
0D58..0D5F; ; UNASSIGNED
0D62..0D65; ; UNASSIGNED
0D70..0D81; ; UNASSIGNED
0D84; ; UNASSIGNED
0D97..0D99; ; UNASSIGNED
0DB2; ; UNASSIGNED
0DBC; ; UNASSIGNED
0DBE..0DBF; ; UNASSIGNED
0DC7..0DC9; ; UNASSIGNED
0DCB..0DCE; ; UNASSIGNED
0DD5; ; UNASSIGNED
0DD7; ; UNASSIGNED
0DE0..0DF1; ; UNASSIGNED
0DF5..0E00; ; UNASSIGNED
0E3B..0E3E; ; UNASSIGNED
0E5C..0E80; ; UNASSIGNED
0E83; ; UNASSIGNED
0E85..0E86; ; UNASSIGNED
0E89; ; UNASSIGNED
0E8B..0E8C; ; UNASSIGNED
0E8E..0E93; ; UNASSIGNED
0E98; ; UNASSIGNED
0EA0; ; UNASSIGNED
0EA4; ; UNASSIGNED
0EA6; ; UNASSIGNED
0EA8..0EA9; ; UNASSIGNED
0EAC; ; UNASSIGNED
0EBA; ; UNASSIGNED
0EBE..0EBF; ; UNASSIGNED
0EC5; ; UNASSIGNED
0EC7; ; UNASSIGNED
0ECE..0ECF; ; UNASSIGNED
0EDA..0EDB; ; UNASSIGNED
0EDE..0EFF; ; UNASSIGNED
0F48; ; UNASSIGNED
0F6B..0F70; ; UNASSIGNED
0F8C..0F8F; ; UNASSIGNED
0F98; ; UNASSIGNED
0FBD; ; UNASSIGNED
0FCD..0FCE; ; UNASSIGNED
0FD0..0FFF; ; UNASSIGNED
1022; ; UNASSIGNED
1028; ; UNASSIGNED
102B; ; UNASSIGNED
1033..1035; ; UNASSIGNED
103A..103F; ; UNASSIGNED
105A..109F; ; UNASSIGNED
10C6..10CF; ; UNASSIGNED
10F9..10FA; ; UNASSIGNED
10FC..10FF; ; UNASSIGNED
115A..115E; ; UNASSIGNED
11A3..11A7; ; UNASSIGNED
11FA..11FF; ; UNASSIGNED
1207; ; UNASSIGNED
1247; ; UNASSIGNED
1249; ; UNASSIGNED
124E..124F; ; UNASSIGNED
1257; ; UNASSIGNED
1259; ; UNASSIGNED
125E..125F; ; UNASSIGNED
1287; ; UNASSIGNED
1289; ; UNASSIGNED
128E..128F; ; UNASSIGNED
12AF; ; UNASSIGNED
12B1; ; UNASSIGNED
12B6..12B7; ; UNASSIGNED
12BF; ; UNASSIGNED
12C1; ; UNASSIGNED
12C6..12C7; ; UNASSIGNED
12CF; ; UNASSIGNED
12D7; ; UNASSIGNED
12EF; ; UNASSIGNED
130F; ; UNASSIGNED
1311; ; UNASSIGNED
1316..1317; ; UNASSIGNED
131F; ; UNASSIGNED
1347; ; UNASSIGNED
135B..1360; ; UNASSIGNED
137D..139F; ; UNASSIGNED
13F5..1400; ; UNASSIGNED
1677..167F; ; UNASSIGNED
169D..169F; ; UNASSIGNED
16F1..16FF; ; UNASSIGNED
170D; ; UNASSIGNED
1715..171F; ; UNASSIGNED
1737..173F; ; UNASSIGNED
1754..175F; ; UNASSIGNED
176D; ; UNASSIGNED
1771; ; UNASSIGNED
1774..177F; ; UNASSIGNED
17DD..17DF; ; UNASSIGNED
17EA..17FF; ; UNASSIGNED
180F; ; UNASSIGNED
181A..181F; ; UNASSIGNED
1878..187F; ; UNASSIGNED
18AA..1DFF; ; UNASSIGNED
1E9C..1E9F; ; UNASSIGNED
1EFA..1EFF; ; UNASSIGNED
1F16..1F17; ; UNASSIGNED
1F1E..1F1F; ; UNASSIGNED
1F46..1F47; ; UNASSIGNED
1F4E..1F4F; ; UNASSIGNED
1F58; ; UNASSIGNED
1F5A; ; UNASSIGNED
1F5C; ; UNASSIGNED
1F5E; ; UNASSIGNED
1F7E..1F7F; ; UNASSIGNED
1FB5; ; UNASSIGNED
1FC5; ; UNASSIGNED
1FD4..1FD5; ; UNASSIGNED
1FDC; ; UNASSIGNED
1FF0..1FF1; ; UNASSIGNED
1FF5; ; UNASSIGNED
1FFF; ; UNASSIGNED
2053..2056; ; UNASSIGNED
2058..205E; ; UNASSIGNED
2064..2069; ; UNASSIGNED
2072..2073; ; UNASSIGNED
208F..209F; ; UNASSIGNED
20B2..20CF; ; UNASSIGNED
20EB..20FF; ; UNASSIGNED
213B..213C; ; UNASSIGNED
214C..2152; ; UNASSIGNED
2184..218F; ; UNASSIGNED
23CF..23FF; ; UNASSIGNED
2427..243F; ; UNASSIGNED
244B..245F; ; UNASSIGNED
24FF; ; UNASSIGNED
2614..2615; ; UNASSIGNED
2618; ; UNASSIGNED
267E..267F; ; UNASSIGNED
268A..2700; ; UNASSIGNED
2705; ; UNASSIGNED
270A..270B; ; UNASSIGNED
2728; ; UNASSIGNED
274C; ; UNASSIGNED
274E; ; UNASSIGNED
2753..2755; ; UNASSIGNED
2757; ; UNASSIGNED
275F..2760; ; UNASSIGNED
2795..2797; ; UNASSIGNED
27B0; ; UNASSIGNED
27BF..27CF; ; UNASSIGNED
27EC..27EF; ; UNASSIGNED
2B00..2E7F; ; UNASSIGNED
2E9A; ; UNASSIGNED
2EF4..2EFF; ; UNASSIGNED
2FD6..2FEF; ; UNASSIGNED
2FFC..2FFF; ; UNASSIGNED
3040; ; UNASSIGNED
3097..3098; ; UNASSIGNED
3100..3104; ; UNASSIGNED
312D..3130; ; UNASSIGNED
318F; ; UNASSIGNED
31B8..31EF; ; UNASSIGNED
321D..321F; ; UNASSIGNED
3244..3250; ; UNASSIGNED
327C..327E; ; UNASSIGNED
32CC..32CF; ; UNASSIGNED
32FF; ; UNASSIGNED
3377..337A; ; UNASSIGNED
33DE..33DF; ; UNASSIGNED
33FF; ; UNASSIGNED
4DB6..4DFF; ; UNASSIGNED
9FA6..9FFF; ; UNASSIGNED
A48D..A48F; ; UNASSIGNED
A4C7..ABFF; ; UNASSIGNED
D7A4..D7FF; ; UNASSIGNED
FA2E..FA2F; ; UNASSIGNED
FA6B..FAFF; ; UNASSIGNED
FB07..FB12; ; UNASSIGNED
FB18..FB1C; ; UNASSIGNED
FB37; ; UNASSIGNED
FB3D; ; UNASSIGNED
FB3F; ; UNASSIGNED
FB42; ; UNASSIGNED
FB45; ; UNASSIGNED
FBB2..FBD2; ; UNASSIGNED
FD40..FD4F; ; UNASSIGNED
FD90..FD91; ; UNASSIGNED
FDC8..FDCF; ; UNASSIGNED
FDFD..FDFF; ; UNASSIGNED
FE10..FE1F; ; UNASSIGNED
FE24..FE2F; ; UNASSIGNED
FE47..FE48; ; UNASSIGNED
FE53; ; UNASSIGNED
FE67; ; UNASSIGNED
FE6C..FE6F; ; UNASSIGNED
FE75; ; UNASSIGNED
FEFD..FEFE; ; UNASSIGNED
FF00; ; UNASSIGNED
FFBF..FFC1; ; UNASSIGNED
FFC8..FFC9; ; UNASSIGNED
FFD0..FFD1; ; UNASSIGNED
FFD8..FFD9; ; UNASSIGNED
FFDD..FFDF; ; UNASSIGNED
FFE7; ; UNASSIGNED
FFEF..FFF8; ; UNASSIGNED
10000..102FF; ; UNASSIGNED
1031F; ; UNASSIGNED
10324..1032F; ; UNASSIGNED
1034B..103FF; ; UNASSIGNED
10426..10427; ; UNASSIGNED
1044E..1CFFF; ; UNASSIGNED
1D0F6..1D0FF; ; UNASSIGNED
1D127..1D129; ; UNASSIGNED
1D1DE..1D3FF; ; UNASSIGNED
1D455; ; UNASSIGNED
1D49D; ; UNASSIGNED
1D4A0..1D4A1; ; UNASSIGNED
1D4A3..1D4A4; ; UNASSIGNED
1D4A7..1D4A8; ; UNASSIGNED
1D4AD; ; UNASSIGNED
1D4BA; ; UNASSIGNED
1D4BC; ; UNASSIGNED
1D4C1; ; UNASSIGNED
1D4C4; ; UNASSIGNED
1D506; ; UNASSIGNED
1D50B..1D50C; ; UNASSIGNED
1D515; ; UNASSIGNED
1D51D; ; UNASSIGNED
1D53A; ; UNASSIGNED
1D53F; ; UNASSIGNED
1D545; ; UNASSIGNED
1D547..1D549; ; UNASSIGNED
1D551; ; UNASSIGNED
1D6A4..1D6A7; ; UNASSIGNED
1D7CA..1D7CD; ; UNASSIGNED
1D800..1FFFD; ; UNASSIGNED
2A6D7..2F7FF; ; UNASSIGNED
2FA1E..2FFFD; ; UNASSIGNED
30000..3FFFD; ; UNASSIGNED
40000..4FFFD; ; UNASSIGNED
50000..5FFFD; ; UNASSIGNED
60000..6FFFD; ; UNASSIGNED
70000..7FFFD; ; UNASSIGNED
80000..8FFFD; ; UNASSIGNED
90000..9FFFD; ; UNASSIGNED
A0000..AFFFD; ; UNASSIGNED
B0000..BFFFD; ; UNASSIGNED
C0000..CFFFD; ; UNASSIGNED
D0000..DFFFD; ; UNASSIGNED
E0000; ; UNASSIGNED
E0002..E001F; ; UNASSIGNED
E0080..EFFFD; ; UNASSIGNED
# Total code points 3653
# This table contains code points from Table B.1 from RFC 3454
00AD; ; MAP
034F; ; MAP
1806; ; MAP
180B; ; MAP
180C; ; MAP
180D; ; MAP
200B; ; MAP
200C; ; MAP
200D; ; MAP
2060; ; MAP
FE00; ; MAP
FE01; ; MAP
FE02; ; MAP
FE03; ; MAP
FE04; ; MAP
FE05; ; MAP
FE06; ; MAP
FE07; ; MAP
FE08; ; MAP
FE09; ; MAP
FE0A; ; MAP
FE0B; ; MAP
FE0C; ; MAP
FE0D; ; MAP
FE0E; ; MAP
FE0F; ; MAP
FEFF; ; MAP
# Total code points 27
# code points from Table C.3
E000..F8FF; ; PROHIBITED
F0000..FFFFD; ; PROHIBITED
100000..10FFFD; ; PROHIBITED
# Total code points 2051
# code points from Table C.4
FDD0..FDEF; ; PROHIBITED
FFFE..FFFF; ; PROHIBITED
1FFFE..1FFFF; ; PROHIBITED
2FFFE..2FFFF; ; PROHIBITED
3FFFE..3FFFF; ; PROHIBITED
4FFFE..4FFFF; ; PROHIBITED
5FFFE..5FFFF; ; PROHIBITED
6FFFE..6FFFF; ; PROHIBITED
7FFFE..7FFFF; ; PROHIBITED
8FFFE..8FFFF; ; PROHIBITED
9FFFE..9FFFF; ; PROHIBITED
AFFFE..AFFFF; ; PROHIBITED
BFFFE..BFFFF; ; PROHIBITED
CFFFE..CFFFF; ; PROHIBITED
DFFFE..DFFFF; ; PROHIBITED
EFFFE..EFFFF; ; PROHIBITED
FFFFE..FFFFF; ; PROHIBITED
10FFFE..10FFFF; ; PROHIBITED
# Total code points 18
# code points from Table C.5
D800..DFFF; ; PROHIBITED
# Total code points 0
# code points from Table C.6
FFF9; ; PROHIBITED
FFFA; ; PROHIBITED
FFFB; ; PROHIBITED
FFFC; ; PROHIBITED
FFFD; ; PROHIBITED
# Total code points 5
# code points from Table C.7
2FF0..2FFB; ; PROHIBITED
# Total code points 1
# code points from Table C.8
0340; ; PROHIBITED
0341; ; PROHIBITED
200E; ; PROHIBITED
200F; ; PROHIBITED
202A; ; PROHIBITED
202B; ; PROHIBITED
202C; ; PROHIBITED
202D; ; PROHIBITED
202E; ; PROHIBITED
206A; ; PROHIBITED
206B; ; PROHIBITED
206C; ; PROHIBITED
206D; ; PROHIBITED
206E; ; PROHIBITED
206F; ; PROHIBITED
# Total code points 15
# code points from Table C.9
E0001; ; PROHIBITED
E0020..E007F; ; PROHIBITED
# Total code points 82
--- NEW FILE: nfs4_mixed_prep_p.txt ---
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others. All Rights Reserved.
###################
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
###################
# This table contains code points from Table A.1 from RFC 3454
0221; ; UNASSIGNED
0234..024F; ; UNASSIGNED
02AE..02AF; ; UNASSIGNED
02EF..02FF; ; UNASSIGNED
0350..035F; ; UNASSIGNED
0370..0373; ; UNASSIGNED
0376..0379; ; UNASSIGNED
037B..037D; ; UNASSIGNED
037F..0383; ; UNASSIGNED
038B; ; UNASSIGNED
038D; ; UNASSIGNED
03A2; ; UNASSIGNED
03CF; ; UNASSIGNED
03F7..03FF; ; UNASSIGNED
0487; ; UNASSIGNED
04CF; ; UNASSIGNED
04F6..04F7; ; UNASSIGNED
04FA..04FF; ; UNASSIGNED
0510..0530; ; UNASSIGNED
0557..0558; ; UNASSIGNED
0560; ; UNASSIGNED
0588; ; UNASSIGNED
058B..0590; ; UNASSIGNED
05A2; ; UNASSIGNED
05BA; ; UNASSIGNED
05C5..05CF; ; UNASSIGNED
05EB..05EF; ; UNASSIGNED
05F5..060B; ; UNASSIGNED
060D..061A; ; UNASSIGNED
061C..061E; ; UNASSIGNED
0620; ; UNASSIGNED
063B..063F; ; UNASSIGNED
0656..065F; ; UNASSIGNED
06EE..06EF; ; UNASSIGNED
06FF; ; UNASSIGNED
070E; ; UNASSIGNED
072D..072F; ; UNASSIGNED
074B..077F; ; UNASSIGNED
07B2..0900; ; UNASSIGNED
0904; ; UNASSIGNED
093A..093B; ; UNASSIGNED
094E..094F; ; UNASSIGNED
0955..0957; ; UNASSIGNED
0971..0980; ; UNASSIGNED
0984; ; UNASSIGNED
098D..098E; ; UNASSIGNED
0991..0992; ; UNASSIGNED
09A9; ; UNASSIGNED
09B1; ; UNASSIGNED
09B3..09B5; ; UNASSIGNED
09BA..09BB; ; UNASSIGNED
09BD; ; UNASSIGNED
09C5..09C6; ; UNASSIGNED
09C9..09CA; ; UNASSIGNED
09CE..09D6; ; UNASSIGNED
09D8..09DB; ; UNASSIGNED
09DE; ; UNASSIGNED
09E4..09E5; ; UNASSIGNED
09FB..0A01; ; UNASSIGNED
0A03..0A04; ; UNASSIGNED
0A0B..0A0E; ; UNASSIGNED
0A11..0A12; ; UNASSIGNED
0A29; ; UNASSIGNED
0A31; ; UNASSIGNED
0A34; ; UNASSIGNED
0A37; ; UNASSIGNED
0A3A..0A3B; ; UNASSIGNED
0A3D; ; UNASSIGNED
0A43..0A46; ; UNASSIGNED
0A49..0A4A; ; UNASSIGNED
0A4E..0A58; ; UNASSIGNED
0A5D; ; UNASSIGNED
0A5F..0A65; ; UNASSIGNED
0A75..0A80; ; UNASSIGNED
0A84; ; UNASSIGNED
0A8C; ; UNASSIGNED
0A8E; ; UNASSIGNED
0A92; ; UNASSIGNED
0AA9; ; UNASSIGNED
0AB1; ; UNASSIGNED
0AB4; ; UNASSIGNED
0ABA..0ABB; ; UNASSIGNED
0AC6; ; UNASSIGNED
0ACA; ; UNASSIGNED
0ACE..0ACF; ; UNASSIGNED
0AD1..0ADF; ; UNASSIGNED
0AE1..0AE5; ; UNASSIGNED
0AF0..0B00; ; UNASSIGNED
0B04; ; UNASSIGNED
0B0D..0B0E; ; UNASSIGNED
0B11..0B12; ; UNASSIGNED
0B29; ; UNASSIGNED
0B31; ; UNASSIGNED
0B34..0B35; ; UNASSIGNED
0B3A..0B3B; ; UNASSIGNED
0B44..0B46; ; UNASSIGNED
0B49..0B4A; ; UNASSIGNED
0B4E..0B55; ; UNASSIGNED
0B58..0B5B; ; UNASSIGNED
0B5E; ; UNASSIGNED
0B62..0B65; ; UNASSIGNED
0B71..0B81; ; UNASSIGNED
0B84; ; UNASSIGNED
0B8B..0B8D; ; UNASSIGNED
0B91; ; UNASSIGNED
0B96..0B98; ; UNASSIGNED
0B9B; ; UNASSIGNED
0B9D; ; UNASSIGNED
0BA0..0BA2; ; UNASSIGNED
0BA5..0BA7; ; UNASSIGNED
0BAB..0BAD; ; UNASSIGNED
0BB6; ; UNASSIGNED
0BBA..0BBD; ; UNASSIGNED
0BC3..0BC5; ; UNASSIGNED
0BC9; ; UNASSIGNED
0BCE..0BD6; ; UNASSIGNED
0BD8..0BE6; ; UNASSIGNED
0BF3..0C00; ; UNASSIGNED
0C04; ; UNASSIGNED
0C0D; ; UNASSIGNED
0C11; ; UNASSIGNED
0C29; ; UNASSIGNED
0C34; ; UNASSIGNED
0C3A..0C3D; ; UNASSIGNED
0C45; ; UNASSIGNED
0C49; ; UNASSIGNED
0C4E..0C54; ; UNASSIGNED
0C57..0C5F; ; UNASSIGNED
0C62..0C65; ; UNASSIGNED
0C70..0C81; ; UNASSIGNED
0C84; ; UNASSIGNED
0C8D; ; UNASSIGNED
0C91; ; UNASSIGNED
0CA9; ; UNASSIGNED
0CB4; ; UNASSIGNED
0CBA..0CBD; ; UNASSIGNED
0CC5; ; UNASSIGNED
0CC9; ; UNASSIGNED
0CCE..0CD4; ; UNASSIGNED
0CD7..0CDD; ; UNASSIGNED
0CDF; ; UNASSIGNED
0CE2..0CE5; ; UNASSIGNED
0CF0..0D01; ; UNASSIGNED
0D04; ; UNASSIGNED
0D0D; ; UNASSIGNED
0D11; ; UNASSIGNED
0D29; ; UNASSIGNED
0D3A..0D3D; ; UNASSIGNED
0D44..0D45; ; UNASSIGNED
0D49; ; UNASSIGNED
0D4E..0D56; ; UNASSIGNED
0D58..0D5F; ; UNASSIGNED
0D62..0D65; ; UNASSIGNED
0D70..0D81; ; UNASSIGNED
0D84; ; UNASSIGNED
0D97..0D99; ; UNASSIGNED
0DB2; ; UNASSIGNED
0DBC; ; UNASSIGNED
0DBE..0DBF; ; UNASSIGNED
0DC7..0DC9; ; UNASSIGNED
0DCB..0DCE; ; UNASSIGNED
0DD5; ; UNASSIGNED
0DD7; ; UNASSIGNED
0DE0..0DF1; ; UNASSIGNED
0DF5..0E00; ; UNASSIGNED
0E3B..0E3E; ; UNASSIGNED
0E5C..0E80; ; UNASSIGNED
0E83; ; UNASSIGNED
0E85..0E86; ; UNASSIGNED
0E89; ; UNASSIGNED
0E8B..0E8C; ; UNASSIGNED
0E8E..0E93; ; UNASSIGNED
0E98; ; UNASSIGNED
0EA0; ; UNASSIGNED
0EA4; ; UNASSIGNED
0EA6; ; UNASSIGNED
0EA8..0EA9; ; UNASSIGNED
0EAC; ; UNASSIGNED
0EBA; ; UNASSIGNED
0EBE..0EBF; ; UNASSIGNED
0EC5; ; UNASSIGNED
0EC7; ; UNASSIGNED
0ECE..0ECF; ; UNASSIGNED
0EDA..0EDB; ; UNASSIGNED
0EDE..0EFF; ; UNASSIGNED
0F48; ; UNASSIGNED
0F6B..0F70; ; UNASSIGNED
0F8C..0F8F; ; UNASSIGNED
0F98; ; UNASSIGNED
0FBD; ; UNASSIGNED
0FCD..0FCE; ; UNASSIGNED
0FD0..0FFF; ; UNASSIGNED
1022; ; UNASSIGNED
1028; ; UNASSIGNED
102B; ; UNASSIGNED
1033..1035; ; UNASSIGNED
103A..103F; ; UNASSIGNED
105A..109F; ; UNASSIGNED
10C6..10CF; ; UNASSIGNED
10F9..10FA; ; UNASSIGNED
10FC..10FF; ; UNASSIGNED
115A..115E; ; UNASSIGNED
11A3..11A7; ; UNASSIGNED
11FA..11FF; ; UNASSIGNED
1207; ; UNASSIGNED
1247; ; UNASSIGNED
1249; ; UNASSIGNED
124E..124F; ; UNASSIGNED
1257; ; UNASSIGNED
1259; ; UNASSIGNED
125E..125F; ; UNASSIGNED
1287; ; UNASSIGNED
1289; ; UNASSIGNED
128E..128F; ; UNASSIGNED
12AF; ; UNASSIGNED
12B1; ; UNASSIGNED
12B6..12B7; ; UNASSIGNED
12BF; ; UNASSIGNED
12C1; ; UNASSIGNED
12C6..12C7; ; UNASSIGNED
12CF; ; UNASSIGNED
12D7; ; UNASSIGNED
12EF; ; UNASSIGNED
130F; ; UNASSIGNED
1311; ; UNASSIGNED
1316..1317; ; UNASSIGNED
131F; ; UNASSIGNED
1347; ; UNASSIGNED
135B..1360; ; UNASSIGNED
137D..139F; ; UNASSIGNED
13F5..1400; ; UNASSIGNED
1677..167F; ; UNASSIGNED
169D..169F; ; UNASSIGNED
16F1..16FF; ; UNASSIGNED
170D; ; UNASSIGNED
1715..171F; ; UNASSIGNED
1737..173F; ; UNASSIGNED
1754..175F; ; UNASSIGNED
176D; ; UNASSIGNED
1771; ; UNASSIGNED
1774..177F; ; UNASSIGNED
17DD..17DF; ; UNASSIGNED
17EA..17FF; ; UNASSIGNED
180F; ; UNASSIGNED
181A..181F; ; UNASSIGNED
1878..187F; ; UNASSIGNED
18AA..1DFF; ; UNASSIGNED
1E9C..1E9F; ; UNASSIGNED
1EFA..1EFF; ; UNASSIGNED
1F16..1F17; ; UNASSIGNED
1F1E..1F1F; ; UNASSIGNED
1F46..1F47; ; UNASSIGNED
1F4E..1F4F; ; UNASSIGNED
1F58; ; UNASSIGNED
1F5A; ; UNASSIGNED
1F5C; ; UNASSIGNED
1F5E; ; UNASSIGNED
1F7E..1F7F; ; UNASSIGNED
1FB5; ; UNASSIGNED
1FC5; ; UNASSIGNED
1FD4..1FD5; ; UNASSIGNED
1FDC; ; UNASSIGNED
1FF0..1FF1; ; UNASSIGNED
1FF5; ; UNASSIGNED
1FFF; ; UNASSIGNED
2053..2056; ; UNASSIGNED
2058..205E; ; UNASSIGNED
2064..2069; ; UNASSIGNED
2072..2073; ; UNASSIGNED
208F..209F; ; UNASSIGNED
20B2..20CF; ; UNASSIGNED
20EB..20FF; ; UNASSIGNED
213B..213C; ; UNASSIGNED
214C..2152; ; UNASSIGNED
2184..218F; ; UNASSIGNED
23CF..23FF; ; UNASSIGNED
2427..243F; ; UNASSIGNED
244B..245F; ; UNASSIGNED
24FF; ; UNASSIGNED
2614..2615; ; UNASSIGNED
2618; ; UNASSIGNED
267E..267F; ; UNASSIGNED
268A..2700; ; UNASSIGNED
2705; ; UNASSIGNED
270A..270B; ; UNASSIGNED
2728; ; UNASSIGNED
274C; ; UNASSIGNED
274E; ; UNASSIGNED
2753..2755; ; UNASSIGNED
2757; ; UNASSIGNED
275F..2760; ; UNASSIGNED
2795..2797; ; UNASSIGNED
27B0; ; UNASSIGNED
27BF..27CF; ; UNASSIGNED
27EC..27EF; ; UNASSIGNED
2B00..2E7F; ; UNASSIGNED
2E9A; ; UNASSIGNED
2EF4..2EFF; ; UNASSIGNED
2FD6..2FEF; ; UNASSIGNED
2FFC..2FFF; ; UNASSIGNED
3040; ; UNASSIGNED
3097..3098; ; UNASSIGNED
3100..3104; ; UNASSIGNED
312D..3130; ; UNASSIGNED
318F; ; UNASSIGNED
31B8..31EF; ; UNASSIGNED
321D..321F; ; UNASSIGNED
3244..3250; ; UNASSIGNED
327C..327E; ; UNASSIGNED
32CC..32CF; ; UNASSIGNED
32FF; ; UNASSIGNED
3377..337A; ; UNASSIGNED
33DE..33DF; ; UNASSIGNED
33FF; ; UNASSIGNED
4DB6..4DFF; ; UNASSIGNED
9FA6..9FFF; ; UNASSIGNED
A48D..A48F; ; UNASSIGNED
A4C7..ABFF; ; UNASSIGNED
D7A4..D7FF; ; UNASSIGNED
FA2E..FA2F; ; UNASSIGNED
FA6B..FAFF; ; UNASSIGNED
FB07..FB12; ; UNASSIGNED
FB18..FB1C; ; UNASSIGNED
FB37; ; UNASSIGNED
FB3D; ; UNASSIGNED
FB3F; ; UNASSIGNED
FB42; ; UNASSIGNED
FB45; ; UNASSIGNED
FBB2..FBD2; ; UNASSIGNED
FD40..FD4F; ; UNASSIGNED
FD90..FD91; ; UNASSIGNED
FDC8..FDCF; ; UNASSIGNED
FDFD..FDFF; ; UNASSIGNED
FE10..FE1F; ; UNASSIGNED
FE24..FE2F; ; UNASSIGNED
FE47..FE48; ; UNASSIGNED
FE53; ; UNASSIGNED
FE67; ; UNASSIGNED
FE6C..FE6F; ; UNASSIGNED
FE75; ; UNASSIGNED
FEFD..FEFE; ; UNASSIGNED
FF00; ; UNASSIGNED
FFBF..FFC1; ; UNASSIGNED
FFC8..FFC9; ; UNASSIGNED
FFD0..FFD1; ; UNASSIGNED
FFD8..FFD9; ; UNASSIGNED
FFDD..FFDF; ; UNASSIGNED
FFE7; ; UNASSIGNED
FFEF..FFF8; ; UNASSIGNED
10000..102FF; ; UNASSIGNED
1031F; ; UNASSIGNED
10324..1032F; ; UNASSIGNED
1034B..103FF; ; UNASSIGNED
10426..10427; ; UNASSIGNED
1044E..1CFFF; ; UNASSIGNED
1D0F6..1D0FF; ; UNASSIGNED
1D127..1D129; ; UNASSIGNED
1D1DE..1D3FF; ; UNASSIGNED
1D455; ; UNASSIGNED
1D49D; ; UNASSIGNED
1D4A0..1D4A1; ; UNASSIGNED
1D4A3..1D4A4; ; UNASSIGNED
1D4A7..1D4A8; ; UNASSIGNED
1D4AD; ; UNASSIGNED
1D4BA; ; UNASSIGNED
1D4BC; ; UNASSIGNED
1D4C1; ; UNASSIGNED
1D4C4; ; UNASSIGNED
1D506; ; UNASSIGNED
1D50B..1D50C; ; UNASSIGNED
1D515; ; UNASSIGNED
1D51D; ; UNASSIGNED
1D53A; ; UNASSIGNED
1D53F; ; UNASSIGNED
1D545; ; UNASSIGNED
1D547..1D549; ; UNASSIGNED
1D551; ; UNASSIGNED
1D6A4..1D6A7; ; UNASSIGNED
1D7CA..1D7CD; ; UNASSIGNED
1D800..1FFFD; ; UNASSIGNED
2A6D7..2F7FF; ; UNASSIGNED
2FA1E..2FFFD; ; UNASSIGNED
30000..3FFFD; ; UNASSIGNED
40000..4FFFD; ; UNASSIGNED
50000..5FFFD; ; UNASSIGNED
60000..6FFFD; ; UNASSIGNED
70000..7FFFD; ; UNASSIGNED
80000..8FFFD; ; UNASSIGNED
90000..9FFFD; ; UNASSIGNED
A0000..AFFFD; ; UNASSIGNED
B0000..BFFFD; ; UNASSIGNED
C0000..CFFFD; ; UNASSIGNED
D0000..DFFFD; ; UNASSIGNED
E0000; ; UNASSIGNED
E0002..E001F; ; UNASSIGNED
E0080..EFFFD; ; UNASSIGNED
# Total code points 3653
# This table contains code points from Table B.1 from RFC 3454
00AD; ; MAP
034F; ; MAP
1806; ; MAP
180B; ; MAP
180C; ; MAP
180D; ; MAP
200B; ; MAP
200C; ; MAP
200D; ; MAP
2060; ; MAP
FE00; ; MAP
FE01; ; MAP
FE02; ; MAP
FE03; ; MAP
FE04; ; MAP
FE05; ; MAP
FE06; ; MAP
FE07; ; MAP
FE08; ; MAP
FE09; ; MAP
FE0A; ; MAP
FE0B; ; MAP
FE0C; ; MAP
FE0D; ; MAP
FE0E; ; MAP
FE0F; ; MAP
FEFF; ; MAP
# Total code points 27
# code points from Table C.1.2
00A0; ; PROHIBITED
1680; ; PROHIBITED
2000; ; PROHIBITED
2001; ; PROHIBITED
2002; ; PROHIBITED
2003; ; PROHIBITED
2004; ; PROHIBITED
2005; ; PROHIBITED
2006; ; PROHIBITED
2007; ; PROHIBITED
2008; ; PROHIBITED
2009; ; PROHIBITED
200A; ; PROHIBITED
200B; ; PROHIBITED
202F; ; PROHIBITED
205F; ; PROHIBITED
3000; ; PROHIBITED
# Total code points 17
# code points from Table C.2.2
0080..009F; ; PROHIBITED
06DD; ; PROHIBITED
070F; ; PROHIBITED
180E; ; PROHIBITED
200C; ; PROHIBITED
200D; ; PROHIBITED
2028; ; PROHIBITED
2029; ; PROHIBITED
2060; ; PROHIBITED
2061; ; PROHIBITED
2062; ; PROHIBITED
2063; ; PROHIBITED
206A..206F; ; PROHIBITED
FEFF; ; PROHIBITED
FFF9..FFFC; ; PROHIBITED
1D173..1D17A; ; PROHIBITED
# Total code points 30
# code points from Table C.3
E000..F8FF; ; PROHIBITED
F0000..FFFFD; ; PROHIBITED
100000..10FFFD; ; PROHIBITED
# Total code points 2051
# code points from Table C.4
FDD0..FDEF; ; PROHIBITED
FFFE..FFFF; ; PROHIBITED
1FFFE..1FFFF; ; PROHIBITED
2FFFE..2FFFF; ; PROHIBITED
3FFFE..3FFFF; ; PROHIBITED
4FFFE..4FFFF; ; PROHIBITED
5FFFE..5FFFF; ; PROHIBITED
6FFFE..6FFFF; ; PROHIBITED
7FFFE..7FFFF; ; PROHIBITED
8FFFE..8FFFF; ; PROHIBITED
9FFFE..9FFFF; ; PROHIBITED
AFFFE..AFFFF; ; PROHIBITED
BFFFE..BFFFF; ; PROHIBITED
CFFFE..CFFFF; ; PROHIBITED
DFFFE..DFFFF; ; PROHIBITED
EFFFE..EFFFF; ; PROHIBITED
FFFFE..FFFFF; ; PROHIBITED
10FFFE..10FFFF; ; PROHIBITED
# Total code points 18
# code points from Table C.5
D800..DFFF; ; PROHIBITED
# Total code points 0
# code points from Table C.6
FFF9; ; PROHIBITED
FFFA; ; PROHIBITED
FFFB; ; PROHIBITED
FFFC; ; PROHIBITED
FFFD; ; PROHIBITED
# Total code points 5
# code points from Table C.7
2FF0..2FFB; ; PROHIBITED
# Total code points 1
# code points from Table C.8
0340; ; PROHIBITED
0341; ; PROHIBITED
200E; ; PROHIBITED
200F; ; PROHIBITED
202A; ; PROHIBITED
202B; ; PROHIBITED
202C; ; PROHIBITED
202D; ; PROHIBITED
202E; ; PROHIBITED
206A; ; PROHIBITED
206B; ; PROHIBITED
206C; ; PROHIBITED
206D; ; PROHIBITED
206E; ; PROHIBITED
206F; ; PROHIBITED
# Total code points 15
# code points from Table C.9
E0001; ; PROHIBITED
E0020..E007F; ; PROHIBITED
# Total code points 82
--- NEW FILE: nfs4_mixed_prep_s.txt ---
###################
# Copyright (C) 2003, International Business Machines
# Corporation and others. All Rights Reserved.
###################
###################
# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT
###################
# This table contains code points from Table A.1 from RFC 3454
0221; ; UNASSIGNED
0234..024F; ; UNASSIGNED
02AE..02AF; ; UNASSIGNED
02EF..02FF; ; UNASSIGNED
0350..035F; ; UNASSIGNED
0370..0373; ; UNASSIGNED
0376..0379; ; UNASSIGNED
037B..037D; ; UNASSIGNED
[...1906 lines suppressed...]
202B; ; PROHIBITED
202C; ; PROHIBITED
202D; ; PROHIBITED
202E; ; PROHIBITED
206A; ; PROHIBITED
206B; ; PROHIBITED
206C; ; PROHIBITED
206D; ; PROHIBITED
206E; ; PROHIBITED
206F; ; PROHIBITED
# Total code points 15
# code points from Table C.9
E0001; ; PROHIBITED
E0020..E007F; ; PROHIBITED
# Total code points 82
--- NEW FILE: ra.txt ---
//*******************************************************************************
//*
//* Copyright (C) 2003, International Business Machines
//* Corporation and others. All Rights Reserved.
//*
//*******************************************************************************
/**
* These are top level comments for the bundle. Tag name: ra
* @translate yes
* @note Comments for tag named ra
*/
ra{
/**
* Top level comments for the string.Tag name: test1
* @translate yes
* @note {0} represents the position of OSNAME and {1} represents the position of job name
* e.g: The OS/400 job named SYSLOG
*/
test1{"The {0} job named {1}"}
/**
* Tag name: test2
* @note This resource is for test2
* @translate yes
*/
test2{"some translatable stuff"}
/**
* Top level comments for TestTable. Tag name: TestTable
* @translate yes
* @note This resource is for TestTable
*/
TestTable{
/**
* Tag name: test3
* @translate yes
* @note This resource is for test3
*/
test3{"jasldjfa"}
/**
* Tag name: test4
* @translate yes
* @note This resource is for test4
*/
test4{"asdfasdf"}
/**
* Tag name: test5
* @translate yes
* @note This resource is for test5
*/
test5:int{1}
/**
* Tag name: test6
* @translate yes
* @note This resource is for test6
*/
test6:bin{"0102"}
}
/**
* Top level comments for Tag name: TestArray
* @translate yes
* @note This resource is for Tag name: Array
*/
TestArray{
/**
* comments for un-named string
* @translate yes
* @note This resource is un-named string
*/
:string{"abcde"},
/**
* comments for un-named int
* @translate yes
* @note This resource is un-named int
*/
:int{1},
/**
* comments for un-named binary
* @translate yes
* @note This resource is for un-named binary
*/
:bin{"12312312"}
}
/**
* comments for TestInclude
* @translate yes
* @note This resource is for TestInclude
*/
TestInclude:include{"translit_rules.txt"}
/**
* comments for TestImport
* @translate yes
* @note This resource is for TestImport
*/
TestImpport:import{"importtest.bin"}
/**
* comments for TestIntVector
* @translate yes
* @note This resource is for TestIntVector
*/
TestIntVector{
/**
* comments for element 1
* @translate yes
* @note This resource is for element 1
*/
1,
/**
* comments for element 2
* @translate yes
* @note This resource is for element 2
*/
2,
/**
* comments for element 3
* @translate yes
* @note This resource is for element 3
*/
3
}
}
--- NEW FILE: riwords.txt ---
(This appears to be a binary file; contents omitted.)
--- NEW FILE: test4x.ucm ---
# *******************************************************************************
# * Copyright (C) 2003, International Business Machines
# * Corporation and others. All Rights Reserved.
# *******************************************************************************
#
# test4x.ucm
#
# Test file for MBCS conversion extension with four-byte codepage data.
<code_set_name> "test4x"
<mb_cur_max> 4
<mb_cur_min> 1
<uconv_class> "MBCS"
# test loading an extension table from the testdata package
<icu:base> "test4"
CHARMAP
<U0009> \x09 |0
END CHARMAP
Index: CollationTest_NON_IGNORABLE_STUB.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/CollationTest_NON_IGNORABLE_STUB.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- CollationTest_NON_IGNORABLE_STUB.txt 10 Sep 2003 02:42:50 -0000 1.1
+++ CollationTest_NON_IGNORABLE_STUB.txt 6 Apr 2004 10:09:51 -0000 1.2
@@ -1,3148 +1,2272 @@
-# Copyright (c) 2001-2003 International Business Machines
-# Corporation and others. All Rights Reserved.
-# This is a stub file
-# for complete test file, go to:
-# http://oss.software.ibm.com/cvs/icu4j/unicodetools/com/ibm/text/data/
-# based on:
-# UCA Version: 3.1.1d6/3.2.0
-# Generated: 2002-07-02,18:49:19 GMT [MD]
-0338 0334;
-0334 0591;
-0334 0592;
[...5389 lines suppressed...]
+4DA8 003F; 4DA8
+4DB2 003F; 4DB2
+2F803 003F; 2F803
+2F860 003F; 2F860
+2F891 0041; 2F891
+2F8E3 003F; 2F8E3
+2F91D 003F; 2F91D
+2F942 003F; 2F942
+2F95D 0041; 2F95D
+2F97C 003F; 2F97C
+2F9B1 003F; 2F9B1
+2F9ED 003F; 2F9ED
+2FA12 003F; 2FA12
+2A6D6 003F; 2A6D6
+F8FC 003F; F8FC
+E0004 003F; E0004
+F00FA 003F; F00FA
+FFFF8 003F; FFFF8
+100004 003F; 100004
+10FF02 003F; 10FF02
Index: CollationTest_SHIFTED_STUB.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/CollationTest_SHIFTED_STUB.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- CollationTest_SHIFTED_STUB.txt 10 Sep 2003 02:42:50 -0000 1.1
+++ CollationTest_SHIFTED_STUB.txt 6 Apr 2004 10:09:51 -0000 1.2
@@ -1,2867 +1,2272 @@
-# Copyright (c) 2001-2003 International Business Machines
-# Corporation and others. All Rights Reserved.
-# This is a stub file
-# for complete test file, go to:
-# http://oss.software.ibm.com/cvs/icu4j/unicodetools/com/ibm/text/data/
-# based on:
-# UCA Version: 3.1.1d6/3.2.0
-# Generated: 2002-07-02,18:49:23 GMT [MD]
-0009 0021;
-000A 0021;
-000B 0021;
[...5108 lines suppressed...]
+4DA8 003F; 4DA8
+4DB2 003F; 4DB2
+2F803 003F; 2F803
+2F860 003F; 2F860
+2F891 0041; 2F891
+2F8E3 003F; 2F8E3
+2F91D 003F; 2F91D
+2F942 003F; 2F942
+2F95D 0041; 2F95D
+2F97C 003F; 2F97C
+2F9B1 003F; 2F9B1
+2F9ED 003F; 2F9ED
+2FA12 003F; 2FA12
+2A6D6 003F; 2A6D6
+F8FC 003F; F8FC
+E0004 003F; E0004
+F00FA 003F; F00FA
+FFFF8 003F; FFFF8
+100004 003F; 100004
+10FF02 003F; 10FF02
Index: DataDrivenCollationTest.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/DataDrivenCollationTest.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- DataDrivenCollationTest.txt 10 Sep 2003 02:42:50 -0000 1.1
+++ DataDrivenCollationTest.txt 6 Apr 2004 10:09:51 -0000 1.2
@@ -18,6 +18,79 @@
}
}
TestData {
+ TestLithuanian {
+ Info {
+ Description { "Lithuanian sort order." }
+ }
+ Settings {
+ {
+ TestLocale { "lt" }
+ }
+ }
+ Cases { "cz<č<d<iz<y<j<sz<š<t<zz<ž" }
+ }
+ TestLatvian {
+ Info {
+ Description { "Latvian sort order." }
+ }
+ Settings {
+ {
+ TestLocale { "lv" }
+ }
+ }
+ Cases { "cz<č<d<gz<ģ<h<iz<y<j<kz<ķ<l<lz<ļ<m<nz<ņ<o<rz<ŗ<s<sz<š<t<zz<ž" }
+ }
+ TestEstonian {
+ Info {
+ Description { "Estonian sort order." }
+ }
+ Settings {
+ {
+ TestLocale { "et" }
+ }
+ }
+ Cases { "sy<š<šy<z<zy<ž<v<w<va<õ<õy<ä<äy<ö<öy<ü<üy<x" }
+ }
+ TestAlbanian {
+ Info {
+ Description { "Albanian sort order." }
+ }
+ Settings {
+ {
+ TestLocale { "sq" }
+ }
+ }
+ Cases { "cz<ç<d<dz<dh<e<ez<ë<f<gz<gj<h<lz<ll<m<nz<nj<o<rz<rr<s<sz<sh<t<tz<th<u<xz<xh<y<zz<zh" }
+ }
+
+ TestSimplifiedChineseOrder {
+ Info {
+ Description { "Sorted file has different order." }
+ }
+ Settings {
+ {
+ TestLocale { "root" }
+ Arguments { "[normalization on]" }
+ }
+ }
+
+ Cases { "\u5F20<\u5F20\u4E00\u8E3F" }
+ }
+
+ TestTibetanNormalizedIterativeCrash {
+ Info {
+ Description { "This pretty much crashes." }
+ }
+ Settings {
+ {
+ TestLocale { "root" }
+ }
+ }
+
+ Cases { "\u0f71\u0f72\u0f80\u0f71\u0f72"
+ "<\u0f80"
+ }
+ }
TestThaiPartialSortKeyProblems {
Info {
Description { "These are examples of strings that caused trouble in partial sort key testing." }
@@ -32,12 +105,12 @@
"<\u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18",
"\u0E01\u0E07\u0E01\u0E32\u0E23"
"<\u0E01\u0E07\u0E42\u0E01\u0E49",
- //"\u0E01\u0E23\u0E19\u0E17\u0E32"
- //"<\u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32",
+ "\u0E01\u0E23\u0E19\u0E17\u0E32"
+ "<\u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32",
"\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27"
"<\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27",
- //"\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D"
- //"<\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32"
+ "\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D"
+ "<\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32"
}
}
TestJavaStyleRule {
@@ -186,8 +259,7 @@
"= \u30A1\u0000\u059a\u30FC"
"= \u30A1\u30FC"
}
- }
-
+ }
da_TestPrimary {
Info {
Description { "This test goes through primary strength cases" }
@@ -199,7 +271,7 @@
}
}
Cases {
- "Lvi=Lwi",
+ "Lvi<Lwi",
"L\u00e4vi<L\u00f6wi",
"L\u00fcbeck=Lybeck",
}
@@ -232,19 +304,19 @@
"\u00c7C<"
"D.S.B.<"
"DA<"
+ "\u00d0A<"
"DB<"
+ "\u00d0C<"
"DSB<"
"DSC<"
- "\u00d0A<"
- "\u00d0C<"
"EKSTRA_ARBEJDE<"
"EKSTRABUD0<"
"H\u00d8ST<"
"HAAG<"
"H\u00c5NDBOG<"
"HAANDV\u00c6RKSBANKEN<"
- "karl<"
"Karl<"
+ "karl<"
"'NIELS J\u00d8RGEN'<"
"NIELS-J\u00d8RGEN<"
"NIELSEN<"
@@ -266,8 +338,8 @@
"STORMLY<"
"THORVALD<"
"THORVARDUR<"
- "THYGESEN<"
"\u00feORVAR\u00d0UR<"
+ "THYGESEN<"
"'VESTERG\u00c5RD, A'<"
"'VESTERGAARD, A'<"
"'VESTERG\u00c5RD, B'<"
@@ -315,11 +387,11 @@
"subtle<"
"symbol<"
"s\u00e4mtlich<"
- "waffle<"
"verkehrt<"
- "wood<"
"vox<"
"v\u00e4ga<"
+ "waffle<"
+ "wood<"
"yen<"
"yuan<"
"yucca<"
@@ -332,6 +404,65 @@
"zysk0<"
"\u00e4ndere"
}
+ }
+ hi_TestNewRules {
+ Info {
+ Description { "This test goes through new rules and tests against old rules" }
+ }
+ Settings {
+ {
+ TestLocale { "hi" }
+ }
+ }
+ Cases {
+ "ॐ<।<॥<॰<०<१<२<३"
+ "<४<५<६<७<८<९<अ<आ"
+ "<इ<ई<उ<ऊ<ऋ<ॠ<ऌ<ॡ"
+ "<ऍ<ऎ<ए<ऐ<ऑ<ऒ<ओ<औ"
+ "<क<क़=क़<कँ<कं<कः<क॑<क॒"
+ "<क॓<क॔<कऽ<क्<का<कि<की<कु"
+ "<कू<कृ<कॄ<कॢ<कॣ<कॅ<कॆ<के"
+ "<कै<कॉ<कॊ<को<कौ<ख<ख़ =ख़<खँ<खं<खः"
+ "<ख॑<ख॒<ख॓<ख॔<खऽ<ख्<खा<खि"
+ "<खी<खु<खू<खृ<खॄ<खॢ<खॣ<खॅ"
+ "<खॆ<खे<खै<खॉ<खॊ<खो<खौ<ग"
+ "<ग़=ग़<गँ<गं<गः<ग॑<ग॒<ग॓<ग॔"
+ "<गऽ<ग्<गा<गि<गी<गु<गू<गृ"
+ "<गॄ<गॢ<गॣ<गॅ<गॆ<गे<गै<गॉ"
+ "<गॊ<गो<गौ<घ<ङ<च<छ<ज<ज़ =ज़<जँ<जं<जः"
+ "<ज॑<ज॒<ज॓<ज॔<जऽ<ज्<जा<जि"
+ "<जी<जु<जू<जृ<जॄ<जॢ<जॣ<जॅ"
+ "<जॆ<जे<जै<जॉ<जॊ<जो<जौ<झ"
+ "<ञ<ट<ठ<ड<ड़=ड़<डँ<डं<डः<ड॑<ड॒<ड॓<ड॔"
+ "<डऽ<ड्<डा<डि<डी<डु<डू<डृ"
+ "<डॄ<डॢ<डॣ<डॅ<डॆ<डे<डै<डॉ"
+ "<डॊ<डो<डौ<ढ<ढ़=ढ़<ढँ<ढं<ढः"
+ "<ढ॑<ढ॒<ढ॓<ढ॔<ढऽ<ढ्<ढा<ढि"
+ "<ढी<ढु<ढू<ढृ<ढॄ<ढॢ<ढॣ<ढॅ"
+ "<ढॆ<ढे<ढै<ढॉ<ढॊ<ढो<ढौ<ण"
+ "<त<थ<द<ध<न<ऩ =ऩ< नँ<नं< नः"
+ "<न॑<न॒<न॓<न॔<नऽ<न्<ना<नि"
+ "<नी<नु<नू<नृ<नॄ<नॢ<नॣ<नॅ"
+ "<नॆ<ने<नै<नॉ<नॊ<नो<नौ"
+ "<प<फ<फ़=फ़<फँ<फं<फः<फ॑<फ॒"
+ "<फ॓<फ॔<फऽ<फ्<फा<फि<फी<फु<फू<फृ"
+ "<फॄ<फॢ<फॣ<फॅ<फॆ<फे<फै<फॉ"
+ "<फॊ<फो<फौ<ब<भ<म<य<य़=य़ "
+ "<यँ<यं<यः<य॑<य॒<य॓<य॔"
+ "<यऽ<य्<या<यि<यी<यु<यू<यृ"
+ "<यॄ<यॢ<यॣ<यॅ<यॆ<ये<यै<यॉ"
+ "<यॊ<यो<यौ<र<ऱ=ऱ<रँ<रं<रः"
+ "<र॑<र॒<र॓<र॔<रऽ<र्<रा<रि"
+ "<री<रु<रू<रृ<रॄ<रॢ<रॣ<रॅ"
+ "<रॆ<रे<रै<रॉ<रॊ<रो<रौ"
+ "<ल<ळ<ऴ=ऴ<ळँ<ळं<ळः<ळ॑<ळ॒"
+ "<ळ॓<ळ॔<ळऽ<ळ्<ळा<ळि<ळी<ळु"
+ "<ळू<ळृ<ळॄ<ळॢ<ळॣ<ळॅ<ळॆ<ळे"
+ "<ळै<ळॉ<ळॊ<ळो<ळौ<व<श<ष<स<ह"
+ "<़<ँ<ं<ः<॑<॒<॓<॔<ऽ<्<ा<ि<ी"
+ "<ु<ू<ृ<ॄ<ॢ<ॣ<ॅ<ॆ"
+ "<े<ै<ॉ<ॊ<ो<ौ"
+ }
}
}
}
Index: idna_rules.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/idna_rules.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- idna_rules.txt 10 Sep 2003 02:42:50 -0000 1.1
+++ idna_rules.txt 6 Apr 2004 10:09:52 -0000 1.2
@@ -71,7 +71,7 @@
"]"}
ProhibitedSet{"[ \\u0020 \\u00A0 \\u1680 \\u2000 \\u2001 \\u2002 \\u2003 \\u2004 \\u2005 \\u2006 \\u2007 "
- "\\u2008 \\u2009 \\u200A \\u200B \\u202F \\u205F \\u3000 \\u0000-\\u001F \\u007F "
+ "\\u2008 \\u2009 \\u200A \\u200B \\u202F \\u205F \\u3000 "
"\\u0080-\\u009F \\u06DD \\u070F \\u180E \\u200C \\u200D \\u2028 \\u2029 \\u2060 "
"\\u2061 \\u2062 \\u2063 \\u206A-\\u206F \\uFEFF \\uFFF9-\\uFFFC \\U0001D173-\\U0001D17A "
"\\uE000-\\uF8FF \\U000F0000-\\U000FFFFD \\U00100000-\\U0010FFFD \\uFDD0-\\uFDEF "
Index: rbbitst.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/rbbitst.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- rbbitst.txt 10 Sep 2003 02:42:50 -0000 1.1
+++ rbbitst.txt 6 Apr 2004 10:09:52 -0000 1.2
@@ -22,6 +22,9 @@
#
+# Temp debugging tests
+<line>
+<data><>\U0001d7f9\u003b\u2034<></data>
########################################################################################
#
@@ -176,6 +179,11 @@
<data>•\u0917\u092a\u00ad\u0936\u092a<200>!•\u092f\u0939<200> •\u0939\u093f\u0928\u094d\u200d\u0926\u0940<200> •\u0939\u0948<200> •\u0905\u093e\u092a<200> •\u0938\u093f\u0916\u094b\u0917\u0947<200>?•\n•:•\u092a\u094d\u0930\u093e\u092f\u0903<200>
•\u0935\u0930\u094d\u0937\u093e<200>\r\n•\u092a\u094d\u0930\u0915\u093e\u0936<200>,•\u0924\u0941\u092e\u093e\u0930\u094b<200> •\u092e\u093f\u0924\u094d\u0930<200> •\u0915\u093e<200> •\u092a\u0924\u094d\u0930<200> •\u092a\u095d\u094b<200> •\u0938\u094d\u0924\u094d\u0930\u093f<200>.• •\u0968\u0966.\u0969\u096f<100> •\u0967\u0966\u0966.\u0966\u0966<100>\u20a8•\u0967,\u0967\u0966\u0966.\u0966\u0966<100> •\u0905\u092e\u091c<200>\n•\u0938\u094d\u200d\u0935\u0924\u0902\u0924\u094d\u0930<200>\r•</data>
+#
+# Failures from monkey tests
+#
+<data>•\u8527<400>\u02ba<200>\u0027\u0d42•\u00b7•\u09ea<100></data>
+
########################################################################################
#
#
@@ -192,16 +200,16 @@
<sent>
-<data>•This\n•</data>
+<data>•This\n<100></data>
<data>•Hello! •how are you? •I'am fine. •Thankyou. •How are you \
-doing? •This\n• costs $20,00,000. •</data>
+doing? •This\n<100> costs $20,00,000. •</data>
# Sentence ending in a quote.
<data>•"Sentence ending with a quote." •Bye.•</data>
# Sentence, and test data, ending without a period or other terminator.
-<data>•Here is a random sentence, no ending period•</data>
+<data>•Here is a random sentence, no ending period<100></data>
<data>• (This is it). •Testing the sentence iterator. •\
@@ -221,7 +229,7 @@
Yet another popular saying is: \
'I'm fine thanks.' •\
What is the proper use of the abbreviation pp.•? •Yes, I am definatelly 12" tall!•!\
-•Now\r•is\n•the\r\n•time\n•\r•for\r•\r•</data>
+•Now\r<100>is\n<100>the\r\n<100>time\n<100>\r<100>for\r<100>\r<100></data>
<data>•No breaks when . is surrounded by UPPER.Case letters. •</data>
<data>•No breaks when . is followed by Numeric .4 a.4 C.4 3.1 .•</data>
@@ -242,7 +250,7 @@
#
# Don't break sentences at boundary between CJK and digits
#
-<data>•\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e48888\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u3002•Bye, now•</data>"
+<data>•\u5487\u67ff\ue591\u5017\u61b3\u60a1\u9510\u8165\u9de8\u97e48888\u821c\u8165\u7fc8\u51ce\u306d\ue30b\u2494\u56d8\u4ec0\u60b1\u8560\u51ba\u611d\u57b6\u2510\u5d46\u97e5\u7751\u3002•Bye, now<100></data>"
#
# Breaks around '(' following a sentence TERM. (Rule 9)
@@ -253,16 +261,15 @@
<data>•How do you do? •(fine). •</data>
#
-<data>•Hello.123•</data> # Rule 6
-<data>•Hello?•123•</data>
+<data>•Hello.123<100></data> # Rule 6
+<data>•Hello?•123<100></data>
-<data>•HELLO.Bye•</data> # Rule 7
-<data>•HELLO?•Bye•</data>
+<data>•HELLO.Bye<100></data> # Rule 7
+<data>•HELLO?•Bye<100></data>
-<data>•Hello.goodbye•</data> #Rule 8
-<data>•Hello. •Goodbye•</data>
-<data>•Hello. goodbye•</data>
-<data>•Hello.)@#$%^&*()"" goodbye•</data>
+<data>•Hello.goodbye<100></data> #Rule 8
+<data>•Hello. •Goodbye<100></data>
+<data>•Hello. goodbye<100></data>
@@ -295,9 +302,9 @@
# make sure there is sentence break after ?,danda(hindi phrase separator),
# fullstop followed by space. (VERY old test)
#
-<data>•\u0928\u092e\u0938\u094d\u200d\u0924\u0947 \u0930\u092e\u0947\u0936\u0905\u093e\u092a\u0915\u0948\u0938\u0947 \u0939\u0948?•\u092e\u0948 \u0905\u091a\u094d\u200d \u091b\u093e \u0939\u0942\u0901\u0964 •\u0905\u093e\u092a\r\n•\
+<data>•\u0928\u092e\u0938\u094d\u200d\u0924\u0947 \u0930\u092e\u0947\u0936\u0905\u093e\u092a\u0915\u0948\u0938\u0947 \u0939\u0948?•\u092e\u0948 \u0905\u091a\u094d\u200d \u091b\u093e \u0939\u0942\u0901\u0964 •\u0905\u093e\u092a\r\n<100>\
\u0915\u0948\u0938\u0947 \u0939\u0948?•\u0935\u0939 \u0915\u094d\u200d\u092f\u093e\n\
-•\u0939\u0948?•\u092f\u0939 \u0905\u093e\u092e \u0939\u0948. •\u092f\u0939 means "this". •"\u092a\u095d\u093e\u0908" meaning "education" or "studies". •\u0905\u093e\u091c(\u0938\u094d\u200d\u0935\u0924\u0902\u0924\u094d\u0930 \u0926\u093f\u0935\u093e\u0938) \u0939\u0948\u0964 •Let's end here. •</data>
+<100>\u0939\u0948?•\u092f\u0939 \u0905\u093e\u092e \u0939\u0948. •\u092f\u0939 means "this". •"\u092a\u095d\u093e\u0908" meaning "education" or "studies". •\u0905\u093e\u091c(\u0938\u094d\u200d\u0935\u0924\u0902\u0924\u094d\u0930 \u0926\u093f\u0935\u093e\u0938) \u0939\u0948\u0964 •Let's end here. •</data>
# Regression test for bug #1984, Sentence break in Arabic text.
@@ -358,7 +365,7 @@
<data>• •\u0041•</data>
<data>• •\u0009•</data>
<data>• •\u00B4•</data>
-<data>• \u000C•</data> # LB3C × BK
+<data>• \u000C<100></data> # LB3C × BK
<data>• •\u2014•</data>
<data>• •\uFFFC•</data>
<data>• \u0029•</data> # LB 8 × CL
@@ -369,7 +376,7 @@
<data>• •\u4E00•</data>
<data>• •\u2024•</data>
<data>• \u002C•</data> # LB 8 × IS
-<data>• \u000A•</data> # LB3C × ( BK | CR | LF | NL )
+<data>• \u000A<100></data> # LB3C × ( BK | CR | LF | NL )
<data>• •\u0E5A•</data>
<data>• •\u0032•</data>
<data>• •\u0028•</data>
@@ -387,34 +394,34 @@
# 3a Always break after hard line breaks.
# 3c Never break before hard line breaks.
-<data>• •\u00A1\u2028•\u00A1•</data>
-<data>• •\u0041\u2028•\u0041•</data>
-<data>• •\u0009\u2028•\u0009•</data>
-<data>• •\u00B4\u2028•\u00B4•</data>
-<data>• \u000C•\u2028•\u000C•</data>
-<data>• •\u2014\u2028•\u2014•</data>
-<data>• •\uFFFC\u2028•\uFFFC•</data>
-<data>• \u0029\u2028•\u0029•</data>
-#<data>• \u0301\u2028•\u0301•</data> # TODO: fix.
-<data>• \u0021\u2028•\u0021•</data>
-#<data>• \u00A0\u2028•\u00A0•</data> # TODO: fix
-<data>• •\u002D\u2028•\u002D•</data>
-<data>• •\u4E00\u2028•\u4E00•</data>
-<data>• •\u2024\u2028•\u2024•</data>
-<data>• \u002C\u2028•\u002C•</data>
-<data>• \u000A•\u2028•\u000A•</data>
-<data>• •\u0E5A\u2028•\u0E5A•</data>
-<data>• •\u0032\u2028•\u0032•</data>
-<data>• •\u0028\u2028•\u0028•</data>
-<data>• •\u0025\u2028•\u0025•</data>
-<data>• •\u0024\u2028•\u0024•</data>
-<data>• •\u0022\u2028•\u0022•</data>
-<data>• •\u0E01\u2028•\u0E01•</data>
-<data>• •\uDB7F\u2028•\uDB7F•</data>
-<data>• \u0020\u2028•\u0020•</data>
-<data>• \u002F\u2028•\u002F•</data>
-<data>• •\uF8FF\u2028•\uF8FF•</data>
-<data>• \u200B\u2028•\u200B•</data>
+<data>• •\u00A1\u2028<100>\u00A1•</data>
+<data>• •\u0041\u2028<100>\u0041•</data>
+<data>• •\u0009\u2028<100>\u0009•</data>
+<data>• •\u00B4\u2028<100>\u00B4•</data>
+<data>• \u000C<100>\u2028<100>\u000C<100></data>
+<data>• •\u2014\u2028<100>\u2014•</data>
+<data>• •\uFFFC\u2028<100>\uFFFC•</data>
+<data>• \u0029\u2028<100>\u0029•</data>
+#<data>• \u0301\u2028<100>\u0301•</data> # TODO: fix.
+<data>• \u0021\u2028<100>\u0021•</data>
+#<data>• \u00A0\u2028<100>\u00A0•</data> # TODO: fix
+<data>• •\u002D\u2028<100>\u002D•</data>
+<data>• •\u4E00\u2028<100>\u4E00•</data>
+<data>• •\u2024\u2028<100>\u2024•</data>
+<data>• \u002C\u2028<100>\u002C•</data>
+<data>• \u000A<100>\u2028<100>\u000A<100></data>
+<data>• •\u0E5A\u2028<100>\u0E5A•</data>
+<data>• •\u0032\u2028<100>\u0032•</data>
+<data>• •\u0028\u2028<100>\u0028•</data>
+<data>• •\u0025\u2028<100>\u0025•</data>
+<data>• •\u0024\u2028<100>\u0024•</data>
+<data>• •\u0022\u2028<100>\u0022•</data>
+<data>• •\u0E01\u2028<100>\u0E01•</data>
+<data>• •\uDB7F\u2028<100>\uDB7F•</data>
+<data>• \u0020\u2028<100>\u0020•</data>
+<data>• \u002F\u2028<100>\u002F•</data>
+<data>• •\uF8FF\u2028<100>\uF8FF•</data>
+<data>• \u200B\u2028<100>\u200B•</data>
#
@@ -424,11 +431,11 @@
<line>
<data>•Multi-•Level •example •of •a •semi-•idiotic •non-•sensical •(non-•important) •sentence.
-•Hi •Hello •How\n•are\r•you\u2028•fine.\t•good. •Now\r•is\n•the\r\n•time\n•\r•for\r•\r•all•</data>
+<100>Hi •Hello •How\n<100>are\r<100>you\u2028<100>fine.\t•good. •Now\r<100>is\n<100>the\r\n<100>time\n<100>\r<100>for\r<100>\r<100>all•</data>
<line>
-<data>•Hello! •how\r\n• •(are)\r• •you? •I'am •fine- •Thankyou. •foo\u00a0bar
-•How, •are, •you? •This, •costs •$20,00,000.•</data>
+<data>•Hello! •how\r\n<100> •(are)\r<100> •you? •I'am •fine- •Thankyou. •foo\u00a0bar
+<100>How, •are, •you? •This, •costs •$20,00,000.•</data>
# test for bug #4068133
#
@@ -438,10 +445,10 @@
<data>•foo\u00a0bar•</data>
# to test for bug #4097920
-<data>•dog,•cat,•mouse •(one)•(two)\n•</data>
+<data>•dog,•cat,•mouse •(one)•(two)\n<100></data>
# to test for bug #4035266
-<data>•The •balance •is •$-23,456.78, •not •-•$32,456.78!\n•</data>
+<data>•The •balance •is •$-23,456.78, •not •-•$32,456.78!\n<100></data>
# to test for bug #4098467
@@ -462,7 +469,7 @@
# Surrogate line break tests.
#
-<data>•\u4e01•\ud840\udc01•\u4e02•abc•\ue000•\udb80\udc01•</data>
+<data>•\u4e01•\ud840\udc01•\u4e02•abc •\ue000 •\udb80\udc01•</data>
# Regression for bug 836
<data>•AAA•(AAA •</data>
Index: regextst.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/regextst.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- regextst.txt 10 Sep 2003 02:42:50 -0000 1.1
+++ regextst.txt 6 Apr 2004 10:09:52 -0000 1.2
@@ -18,6 +18,8 @@
# x free spacing and comments
# s dot-matches-all mode
# m multi-line mode. $ and ^ match at embedded new-lines
+# v If icu configured without break iteration, this
+# regex test pattern should not compile.
# d dump the compiled pattern
# t trace operation of match engine.
# White space must be present between the flags and the match string.
@@ -66,6 +68,7 @@
".*\Ahello" "stuff\nhello" # don't match after embedded new-line.
# \b \B
+#
".*?\b(.).*" "<0> $%^&*( <1>h</1>ello123%^&*()gxx</0>"
"\ba\b" "-<0>a</0>"
"\by\b" "xy"
@@ -78,6 +81,20 @@
"(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?.*" "<0> \u0301 \u0301<1>A</1>\u0302BC\u0303\u0304<2> </2>\u0305 \u0306<3>X</3>\u0307Y\u0308</0>"
+
+#
+# Unicode word boundary mode
+#
+"(?w).*?\b" v "<0></0>hello, world"
+"(?w).*?(\b.+?\b).*" v "<0><1> </1> 123.45 </0>"
+"(?w).*?(\b\d.*?\b).*" v "<0> <1>123.45</1> </0>"
+".*?(\b.+?\b).*" "<0> <1>123</1>.45 </0>"
+"(?w:.*?(\b\d.*?\b).*)" v "<0> <1>123.45</1> </0>"
+"(?w:.*?(\b.+?\b).*)" v "<0><1>don't</1> </0>"
+"(?w:.+?(\b\S.+?\b).*)" v "<0> <1>don't</1> </0>"
+"(?w:(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?).*)" v "<0><1>.</1><2> </2><3>,</3><4>:</4><5>$</5><6>37,000.50</6><7> </7> </0>"
+
+
# . does not match new-lines
"." "\u000a\u000d\u0085\u000c\u2028\u2029<0>X</0>\u000aY"
"A." "A\u000a "# no match
@@ -350,6 +367,27 @@
"(\ud800)(\udc00)" "\U00010000"
+#
+# Bug 3225
+
+"1|9" "<0>1</0>"
+"1|9" "<0>9</0>"
+"1*|9" "<0>1</0>"
+"1*|9" "<0></0>9"
+
+"(?:a|ac)d" "<0>acd</0>"
+"a|ac" "<0>a</0>c"
+
+#
+# Bug 3320
+#
+"(a([^ ]+)){0,} (c)" "<0><1>a<2>b</2></1> <3>c</3></0> "
+"(a([^ ]+))* (c)" "<0><1>a<2>b</2></1> <3>c</3></0> "
+
+#
+# Bug 3436
+#
+"(.*?) *$" "<0><1>test</1> </0>"
#
# Random debugging, Temporary
Index: te.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/te.txt,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- te.txt 10 Sep 2003 02:42:50 -0000 1.3
+++ te.txt 6 Apr 2004 10:09:53 -0000 1.4
@@ -1,6 +1,6 @@
//*******************************************************************************
//*
-//* Copyright (C) 1998-2000, International Business Machines
+//* Copyright (C) 1998-2003, International Business Machines
//* Corporation and others. All Rights Reserved.
//*
//*******************************************************************************
@@ -89,12 +89,14 @@
tag0 { TE0 }
}
- CollationElements
- { Version { "1.0" }
+ collations {
+ standard {
+ Version { "1.0" }
Override { "FALSE" }
Sequence {"& A < a\u0308 , A\u0308 & C < c\u0327 , C\u0327 & G < g"
"\u0306 , G\u0306 & H < \u0131 , I , i , \u0130 < \u0132 , \u0133 & O < o\u0308 "
", O\u0308 & S < s\u0327 , S\u0327 & U < u\u0308 , U\u0308 " }
+ }
}
}
Index: test1.ucm
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/test1.ucm,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- test1.ucm 10 Sep 2003 02:42:50 -0000 1.3
+++ test1.ucm 6 Apr 2004 10:09:53 -0000 1.4
@@ -1,5 +1,5 @@
# *******************************************************************************
-# * Copyright (C) 2001, International Business Machines
+# * Copyright (C) 2001-2003, International Business Machines
# * Corporation and others. All Rights Reserved.
# *******************************************************************************
#
@@ -11,13 +11,14 @@
<mb_cur_max> 1
<mb_cur_min> 1
<uconv_class> "MBCS"
-<subchar> \xff
-<icu:state> 0, 5-9, ff
+<subchar> \xff
+<icu:state> 0, 5-9, ff
CHARMAP
# fromUnicode result is zero byte from other than U+0000
<U20ac> \x00 |0
+<U20ad> \x00 |1
# nothing special
<U0005> \x05 |0
Index: test3.ucm
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/test3.ucm,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- test3.ucm 10 Sep 2003 02:42:50 -0000 1.3
+++ test3.ucm 6 Apr 2004 10:09:53 -0000 1.4
@@ -1,20 +1,21 @@
# *******************************************************************************
-# * Copyright (C) 2001, International Business Machines
+# * Copyright (C) 2001-2003, International Business Machines
# * Corporation and others. All Rights Reserved.
# *******************************************************************************
#
# test3.ucm
#
# Test file for MBCS conversion with three-byte codepage data.
+# Also contains extension mappings (m:n).
<code_set_name> "test3"
<mb_cur_max> 3
<mb_cur_min> 1
<uconv_class> "MBCS"
-<subchar> \xff
-<icu:state> 0, 1:1, 5-9, ff
-<icu:state> 2:2
-<icu:state> a-f.p
+<subchar> \xff
+<icu:state> 0, 1:1, 5-9, ff
+<icu:state> 2:2
+<icu:state> a-f.p
CHARMAP
@@ -24,6 +25,11 @@
# nothing special
<U0005> \x05 |0
+# extensions
+<U00c0> \x05+\x01\x02\x0d |0
+<U00c0> \x05+\x01\x02\x0e |3
+<U00c0> \x05+\xff |3
+
# toUnicode result is fallback direct
<U0006> \x06 |3
@@ -31,7 +37,17 @@
<U101234> \x07 |0
<Ufebcd> \x08 |3
+# extensions
+<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0
+<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0
+<U101234>+<U60006> \x07+\x00+\x01\x02\x0f+\x06 |0
+<U101234>+<U70007> \x07+\x00+\x01\x02\x0f |1
+
#unassigned \x09
+
+# extensions where the first code point is unassigned, for replay testing
+#<U00c4><U0300> \x09+\x09 |0
+<U00c4><U00c4><U101234><U0005> \x05+\x01\x02\x0c |0
# toUnicode result is surrogate pair: test real pair, single unit, unassigned
<U23456> \x01\x02\x0a |0
Index: test4.ucm
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/test4.ucm,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- test4.ucm 10 Sep 2003 02:42:50 -0000 1.3
+++ test4.ucm 6 Apr 2004 10:09:53 -0000 1.4
@@ -1,27 +1,35 @@
# *******************************************************************************
-# * Copyright (C) 2001, International Business Machines
+# * Copyright (C) 2001-2003, International Business Machines
# * Corporation and others. All Rights Reserved.
# *******************************************************************************
#
# test4.ucm
#
-# Test file for MBCS conversion with three-byte codepage data.
+# Test file for MBCS conversion with four-byte codepage data.
<code_set_name> "test4"
<mb_cur_max> 4
<mb_cur_min> 1
<uconv_class> "MBCS"
-<subchar> \xff
-<icu:state> 0, 1:1, 5-9, ff
-<icu:state> 2:2
-<icu:state> 3:3
-<icu:state> a-f.p
+
+# both subchars are single-byters, which does not make sense
+# but works - adding subchar1 for tests but don't want to
+# change old tests for a new subchar -- markus 20031028
+<subchar> \xff
+<subchar1> \xe1
+<icu:state> 0, 1:1, 5-9, e1, ff
+<icu:state> 2:2
+<icu:state> 3:3
+<icu:state> a-f.p, ff
CHARMAP
# fromUnicode result is zero byte from other than U+0000
<U20ac> \x00 |0
+# fallback from non-zero to zero possible with extension table
+<U20ad> \x00 |1
+
# nothing special
<U0005> \x05 |0
@@ -41,5 +49,16 @@
<U34567> \x01\x02\x03\x0d |3
<U000e> \x01\x02\x03\x0e |3
#unassigned \x01\x02\x03\x0f
+
+# <subchar1> non-mapping
+<U50005> \xe1 |2
+# add a mapping that turns the above's Unicode side into a prefix
+<U50005><U60006> \x06 |1
+
+# many bytes, and bytes per UChar
+<U30ab><U309a> \x01\x02\x03\x0a\x01\x02\x03\x0b\x01\x02\x03\x0c\x01\x02\x03\x0d\x01\x02\x03\x0e\x01\x02\x03\x0f\x01\x02\x03\x0a\x05\x06\x07 |0
+
+# many UChars, and UChars per byte
+<U304b><U309a><U304d><U309a><U304f><U309a><U3051><U309a><U3053><U309a><U30ab><U309a><U30ad><U309a><U30af><U309a><U30b1><U309a><U0300> \x08\x09 |0
END CHARMAP
Index: testaliases.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/testaliases.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- testaliases.txt 10 Sep 2003 02:42:50 -0000 1.1
+++ testaliases.txt 6 Apr 2004 10:09:53 -0000 1.2
@@ -23,10 +23,10 @@
// alias that uses another alias and references parts of the end structure
referencingalias:alias { "testaliases/anotheralias/Sequence" } // Referencing through another alias
- anotheralias:alias { "/ICUDATA/uk/CollationElements" }
+ anotheralias:alias { "/ICUDATA/uk/collations/standard" }
// aliasing using position
- CollationElements:alias { "/ICUDATA/uk" } // Referencing corresponding resource in another bundle
+ collations:alias { "/ICUDATA/uk" } // Referencing corresponding resource in another bundle
// aliasing arrays
zoneTests {
Index: testdata.mk
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/testdata.mk,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- testdata.mk 10 Sep 2003 02:42:50 -0000 1.4
+++ testdata.mk 6 Apr 2004 10:09:53 -0000 1.5
@@ -14,16 +14,31 @@
ALL : "$(TESTDATAOUT)\testdata.dat"
@echo Test data is built.
-"$(TESTDATAOUT)\testdata.dat" : "$(TESTDATABLD)\casing.res" "$(TESTDATABLD)\mc.res" "$(TESTDATABLD)\root.res" "$(TESTDATABLD)\te.res" "$(TESTDATABLD)\te_IN.res" "$(TESTDATABLD)\testaliases.res" "$(TESTDATABLD)\testtypes.res" "$(TESTDATABLD)\testempty.res" "$(TESTDATABLD)\$(TESTDT)iscii.res" "$(TESTDATABLD)\$(TESTDT)idna_rules.res" "$(TESTDATABLD)\DataDrivenCollationTest.res" $(TESTDATABLD)\$(TESTDT)test.icu "$(TESTDATABLD)\$(TESTDT)test1.cnv" "$(TESTDATABLD)\$(TESTDT)test3.cnv" "$(TESTDATABLD)\$(TESTDT)test4.cnv" "$(TESTDATABLD)\$(TESTDT)ibm9027.cnv"
+# icu26_testtypes.res is there for cintltst/udatatst.c/TestSwapData()
+# I generated it with an ICU 2.6.1 build on Windows after removing
+# testincludeUTF (which made it large, unnecessarily for this test)
+# and CollationElements (which will not work with a newer swapper)
+# markus 2003nov19
+
+# icu26e_testtypes.res is the same, but icuswapped to big-endian EBCDIC
+# markus 2003nov21
+
+"$(TESTDATAOUT)\testdata.dat" : "$(TESTDATABLD)\casing.res" "$(TESTDATABLD)\conversion.res" "$(TESTDATABLD)\mc.res" "$(TESTDATABLD)\root.res" "$(TESTDATABLD)\te.res" "$(TESTDATABLD)\te_IN.res" "$(TESTDATABLD)\testaliases.res" "$(TESTDATABLD)\testtypes.res" "$(TESTDATABLD)\testempty.res" "$(TESTDATABLD)\$(TESTDT)iscii.res" "$(TESTDATABLD)\$(TESTDT)idna_rules.res" "$(TESTDATABLD)\DataDrivenCollationTest.res" "$(TESTDATABLD)\$(TESTDT)test.icu" "$(TESTDATABLD)\$(TESTDT)testtable32.res" "$(TESTDATABLD)\$(TESTDT)test1.cnv" "$(TESTDATABLD)\$(TESTDT)test3.cnv" "$(TESTDATABLD)\$(TESTDT)test4.cnv" "$(TESTDATABLD)\$(TESTDT)test4x.cnv" "$(TESTDATABLD)\$(TESTDT)ibm9027.cnv" "$(TESTDATABLD)\$(TESTDT)nfscsi.spp" "$(TESTDATABLD)\$(TESTDT)nfscss.spp" "$(TESTDATABLD)\$(TESTDT)nfscis.spp" "$(TESTDATABLD)\$(TESTDT)nfsmxs.spp" "$(TESTDATABLD)\$(TESTDT)nfsmxp.spp"
@echo Building test data
@copy "$(TESTDATABLD)\$(TESTDT)te.res" "$(TESTDATAOUT)\$(TESTDT)nam.typ"
- @"$(ICUTOOLS)\pkgdata\$(CFG)\pkgdata" -f -v -m common -c -p"$(TESTPKG)" -O "$(PKGOPT)" -d "$(TESTDATAOUT)" -T "$(TESTDATABLD)" -s "$(TESTDATABLD)" <<
+ @copy "$(TESTDATA)\$(TESTDT)icu26_testtypes.res" "$(TESTDATABLD)"
+ @copy "$(TESTDATA)\$(TESTDT)icu26e_testtypes.res" "$(TESTDATABLD)"
+ @"$(ICUP)\bin\pkgdata" -f -v -m common -c -p"$(TESTPKG)" -d "$(TESTDATAOUT)" -T "$(TESTDATABLD)" -s "$(TESTDATABLD)" <<
$(TESTDT)casing.res
+$(TESTDT)conversion.res
$(TESTDT)mc.res
$(TESTDT)root.res
+$(TESTDT)testtable32.res
$(TESTDT)te.res
$(TESTDT)te_IN.res
$(TESTDT)testtypes.res
+$(TESTDT)icu26_testtypes.res
+$(TESTDT)icu26e_testtypes.res
$(TESTDT)testempty.res
$(TESTDT)testaliases.res
$(TESTDT)iscii.res
@@ -32,8 +47,14 @@
$(TESTDT)test1.cnv
$(TESTDT)test3.cnv
$(TESTDT)test4.cnv
+$(TESTDT)test4x.cnv
$(TESTDT)ibm9027.cnv
$(TESTDT)idna_rules.res
+$(TESTDT)nfscsi.spp
+$(TESTDT)nfscss.spp
+$(TESTDT)nfscis.spp
+$(TESTDT)nfsmxs.spp
+$(TESTDT)nfsmxp.spp
<<
@@ -53,9 +74,41 @@
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -p"$(TESTPKG)" -q -s"$(TESTDATA)" -d"$(TESTDATABLD)" idna_rules.txt
-$(TESTDATABLD)\$(TESTDT)test.icu : {"$(ICUTOOLS)\gentest\$(CFG)"}gentest.exe
+"$(TESTDATABLD)\$(TESTDT)test.icu" : {"$(ICUTOOLS)\gentest\$(CFG)"}gentest.exe
"$(ICUTOOLS)\gentest\$(CFG)\gentest" -d"$(TESTDATABLD)"
+# testtable32 resource file
+"$(TESTDATABLD)\testtable32.txt" : {"$(ICUTOOLS)\gentest\$(CFG)"}gentest.exe
+ "$(ICUTOOLS)\gentest\$(CFG)\gentest" -r -d"$(TESTDATABLD)"
+
+"$(TESTDATABLD)\$(TESTDT)testtable32.res": "$(TESTDATABLD)\testtable32.txt"
+ @echo Making Test Resource Bundle file for IDNA reference implementation
+ @"$(ICUTOOLS)\genrb\$(CFG)\genrb" -p"$(TESTPKG)" -q -s"$(TESTDATABLD)" -d"$(TESTDATABLD)" testtable32.txt
+
+# Targets for nfscsi.spp
+"$(TESTDATABLD)\$(TESTDT)nfscsi.spp" : {"$(ICUTOOLS)\gensprep\$(CFG)"}gensprep.exe "$(TESTDATA)\nfs4_cs_prep_ci.txt"
+ @echo Building nfscsi.spp
+ @"$(ICUTOOLS)\gensprep\$(CFG)\gensprep" -s "$(TESTDATA)" -d "$(TESTDATABLD)\\" -b nfscsi -p "$(TESTPKG)" -u 3.2.0 nfs4_cs_prep_ci.txt
+
+# Targets for nfscss.spp
+"$(TESTDATABLD)\$(TESTDT)nfscss.spp" : {"$(ICUTOOLS)\gensprep\$(CFG)"}gensprep.exe "$(TESTDATA)\nfs4_cs_prep_cs.txt"
+ @echo Building nfscss.spp
+ @"$(ICUTOOLS)\gensprep\$(CFG)\gensprep" -s "$(TESTDATA)" -d "$(TESTDATABLD)\\" -b nfscss -p "$(TESTPKG)" -u 3.2.0 nfs4_cs_prep_cs.txt
+
+# Targets for nfscis.spp
+"$(TESTDATABLD)\$(TESTDT)nfscis.spp" : {"$(ICUTOOLS)\gensprep\$(CFG)"}gensprep.exe "$(TESTDATA)\nfs4_cis_prep.txt"
+ @echo Building nfscis.spp
+ @"$(ICUTOOLS)\gensprep\$(CFG)\gensprep" -s "$(TESTDATA)" -d "$(TESTDATABLD)\\" -b nfscis -p "$(TESTPKG)" -u 3.2.0 -k -n "$(ICUTOOLS)\..\data\unidata" nfs4_cis_prep.txt
+
+# Targets for nfsmxs.spp
+"$(TESTDATABLD)\$(TESTDT)nfsmxs.spp" : {"$(ICUTOOLS)\gensprep\$(CFG)"}gensprep.exe "$(TESTDATA)\nfs4_mixed_prep_s.txt"
+ @echo Building nfsmxs.spp
+ @"$(ICUTOOLS)\gensprep\$(CFG)\gensprep" -s "$(TESTDATA)" -d "$(TESTDATABLD)\\" -b nfsmxs -p "$(TESTPKG)" -u 3.2.0 -k -n "$(ICUTOOLS)\..\data\unidata" nfs4_mixed_prep_s.txt
+
+# Targets for nfsmxp.spp
+"$(TESTDATABLD)\$(TESTDT)nfsmxp.spp" : {"$(ICUTOOLS)\gensprep\$(CFG)"}gensprep.exe "$(TESTDATA)\nfs4_mixed_prep_p.txt"
+ @echo Building nfsmxp.spp
+ @"$(ICUTOOLS)\gensprep\$(CFG)\gensprep" -s "$(TESTDATA)" -d "$(TESTDATABLD)\\" -b nfsmxp -p "$(TESTPKG)" -u 3.2.0 -k -n "$(ICUTOOLS)\..\data\unidata" nfs4_mixed_prep_p.txt
# Targets for test converter data
@@ -66,6 +119,9 @@
@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -d"$(TESTDATABLD)" -p $(TESTPKG) $**
"$(TESTDATABLD)\$(TESTDT)test4.cnv": "$(TESTDATA)\test4.ucm"
+ @"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -d"$(TESTDATABLD)" -p $(TESTPKG) $**
+
+"$(TESTDATABLD)\$(TESTDT)test4x.cnv": "$(TESTDATA)\test4x.ucm"
@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -d"$(TESTDATABLD)" -p $(TESTPKG) $**
"$(TESTDATABLD)\$(TESTDT)ibm9027.cnv": "$(TESTDATA)\ibm9027.ucm"
Index: testtypes.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/testtypes.txt,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- testtypes.txt 10 Sep 2003 02:42:50 -0000 1.4
+++ testtypes.txt 6 Apr 2004 10:09:53 -0000 1.5
@@ -33,16 +33,18 @@
// genrb just includes the test as a string after converting to UTF-16
- testincludeUTF:include{ "th18057.txt" }
+ testincludeUTF:include{ "riwords.txt" }
// No unescaping is done.
testinclude:include{ "translit_rules.txt" }
// Genrb failed parsing \u0075 sequence this tests it
- CollationElements{
+ collations {
+ standard {
Version{"x01"}
Sequence{
"&'\u0075' = '\uFF55'" // LATIN SMALL LETTER U
}
+ }
}
string{ }
Index: translit_rules.txt
===================================================================
RCS file: /cvs/core/icu-sword/source/test/testdata/translit_rules.txt,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- th18057.txt DELETED ---
- Previous message: [sword-cvs] icu-sword/source/common/unicode symtable.h,NONE,1.1 usprep.h,NONE,1.1 utrace.h,NONE,1.1 brkiter.h,1.1,1.2 caniter.h,1.1,1.2 chariter.h,1.4,1.5 dbbi.h,1.1,1.2 locid.h,1.4,1.5 normlzr.h,1.5,1.6 parsepos.h,1.1,1.2 platform.h.in,1.5,1.6 pos400.h,1.3,1.4 putil.h,1.4,1.5 pwin32.h,1.5,1.6 rbbi.h,1.1,1.2 rep.h,1.4,1.5 resbund.h,1.4,1.5 schriter.h,1.4,1.5 strenum.h,1.1,1.2 ubrk.h,1.1,1.2 uchar.h,1.5,1.6 uchriter.h,1.4,1.5 uclean.h,1.4,1.5 ucnv.h,1.4,1.5 ucnv_err.h,1.4,1.5 uenum.h,1.1,1.2 uidna.h,1.1,1.2 uiter.h,1.1,1.2 uloc.h,1.4,1.5 umachine.h,1.4,1.5 umisc.h,1.3,1.4 unifilt.h,1.1,1.2 unifunct.h,1.1,1.2 unimatch.h,1.1,1.2 uniset.h,1.1,1.2 unistr.h,1.5,1.6 unorm.h,1.4,1.5 uobject.h,1.1,1.2 urename.h,1.5,1.6 ures.h,1.5,1.6 uscript.h,1.5,1.6 uset.h,1.1,1.2 usetiter.h,1.1,1.2 ustring.h,1.4,1.5 utf.h,1.4,1.5 utf16.h,1.3,1.4 utf8.h,1.4,1.5 utypes.h,1.9,1.10 uversion.h,1.5,1.6
- Next message: [sword-cvs] icu-sword/debian rules,1.5,1.6
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]