[sword-svn] r169 - in trunk/source/data/translit/crosswire: . cldr
chrislit at crosswire.org
chrislit at crosswire.org
Fri Feb 20 20:47:57 MST 2009
Author: chrislit
Date: 2009-02-20 20:47:57 -0700 (Fri, 20 Feb 2009)
New Revision: 169
Modified:
trunk/source/data/translit/crosswire/cldr/cldr2icu.pl
trunk/source/data/translit/crosswire/icu2cldr.pl
Log:
added NFC normalization
fixed a space (\s) over-generalization issue
fixed a mis-ordered regex substitution
Modified: trunk/source/data/translit/crosswire/cldr/cldr2icu.pl
===================================================================
--- trunk/source/data/translit/crosswire/cldr/cldr2icu.pl 2009-02-21 03:16:40 UTC (rev 168)
+++ trunk/source/data/translit/crosswire/cldr/cldr2icu.pl 2009-02-21 03:47:57 UTC (rev 169)
@@ -2,6 +2,7 @@
use utf8;
use Encode;
+use Unicode::Normalize;
opendir (DIR, ".");
@xlits = grep /\.xml$/, readdir (DIR);
@@ -28,12 +29,12 @@
while (<INF>) {
$line = $_;
$line =~ s/^\x{FEFF}//;
- $line =~ s/\s*$//;
- $line =~ s/^\s*//;
+ $line =~ s/[\t\r\n ]*$//;
+ $line =~ s/^[\t\r\n ]*//;
$line =~ s/<[^<>]+?>//g;
- if ($line !~ /^\s*$/) {
+ if ($line !~ /^[\t\r\n ]*$/) {
$line =~ s/↔/<>/g;
$line =~ s/→/>/g;
@@ -44,7 +45,9 @@
$line =~ s/&/&/g;
$line =~ s/\\u([0-9A-Fa-f]{1,6})/chr(hex($1))/eg;
-
+
+ $line = NFD($line);
+
print OUTF "$line\n";
}
}
Modified: trunk/source/data/translit/crosswire/icu2cldr.pl
===================================================================
--- trunk/source/data/translit/crosswire/icu2cldr.pl 2009-02-21 03:16:40 UTC (rev 168)
+++ trunk/source/data/translit/crosswire/icu2cldr.pl 2009-02-21 03:47:57 UTC (rev 169)
@@ -2,6 +2,7 @@
use utf8;
use Encode;
+use Unicode::Normalize;
opendir (DIR, ".");
@xlits = grep /\.txt$/, readdir (DIR);
@@ -31,18 +32,20 @@
while (<INF>) {
$line = $_;
$line =~ s/^\x{FEFF}//;
- $line =~ s/\s*$//;
- $line =~ s/^\s*//;
- if ($line !~ /^\s*$/) {
+ $line =~ s/[\t\r\n ]*$//;
+ $line =~ s/^[\t\r\n ]*//;
+ if ($line !~ /^[\t\r\n ]*$/) {
$line =~ s/([^\\])<>/$1↔/g;
$line =~ s/([^\\])>/$1→/g;
$line =~ s/([^\\])</$1←/g;
+ $line =~ s/&/&/g;
$line =~ s/</</g;
$line =~ s/>/>/g;
- $line =~ s/&/&/g;
+ $line = NFD($line);
+
if ($line =~ /^\#/) {
print OUTF "<comment>$line<\/comment>\n";
}
More information about the sword-cvs
mailing list