[sword-svn] r249 - trunk/locales

chrislit at crosswire.org chrislit at crosswire.org
Sat Nov 14 15:52:59 MST 2009


Author: chrislit
Date: 2009-11-14 15:52:59 -0700 (Sat, 14 Nov 2009)
New Revision: 249

Modified:
   trunk/locales/updateFiles.pl
Log:
changed most calls to mv to cp so that the original files will be left in place in order to avoid unnecessary retrievals of whole files
added retrieval of data from our Wiki and output to localized.txt
corrected regexes after SIL's page update


Modified: trunk/locales/updateFiles.pl
===================================================================
--- trunk/locales/updateFiles.pl	2009-11-14 09:51:45 UTC (rev 248)
+++ trunk/locales/updateFiles.pl	2009-11-14 22:52:59 UTC (rev 249)
@@ -1,34 +1,53 @@
 #!/usr/bin/perl
 
-#This script calls wget, unzip, and mv, so you'll need to have those binaries installed.
+#This script calls wget, unzip, cp, and mv, so you'll need to have those binaries installed.
 
-`wget -N  http://www.iana.org/assignments/language-subtag-registry`;
-`mv language-subtag-registry language-subtag-registry.txt`;
-`wget -N http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt`;
-`wget -N http://www.loc.gov/standards/iso639-5/iso639-5.pipe.txt`;
-`wget -N http://unicode.org/iso15924/iso15924.txt.zip`;
-`wget -N http://www.iso.org/iso/iso3166_en_code_lists.txt`;
-`wget -N http://www.iso.org/iso/iso3166_fr_code_lists.txt`;
+use utf8;
 
+`wget -N  http://www.iana.org/assignments/language-subtag-registry -owget.log`;
+`cp -f language-subtag-registry language-subtag-registry.txt`;
+`wget -N http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt -owget.log`;
+`wget -N http://www.loc.gov/standards/iso639-5/iso639-5.pipe.txt -owget.log`;
+`wget -N http://unicode.org/iso15924/iso15924.txt.zip -owget.log`;
+`wget -N http://www.iso.org/iso/iso3166_en_code_lists.txt -owget.log`;
+`wget -N http://www.iso.org/iso/iso3166_fr_code_lists.txt -owget.log`;
+
 $ret = `unzip -o iso15924.txt.zip`;
 $ret =~ /(iso15924-utf.+)/;
 `mv -f \"$1\" \"iso15924-utf8.txt\"`;
 `rm iso15924.txt.zip`;
 
-`wget -N http://www.sil.org/iso639-3/download.asp`;
+`wget -N http://www.sil.org/iso639-3/download.asp -owget.log`;
 open DL, "download.asp";
 while (<DL>) {$downloadasp .= $_;}
 close (DL);
 `rm download.asp`;
 
-$downloadasp =~ /Download ISO 639-3 code set <a HREF=\"([^\"]+)\">UTF-8/;
-`wget -N "http://www.sil.org/iso639-3/$1"`;
-`mv -f \"$1\" \"iso-639-3.tab\"`;
+$downloadasp =~ /<a HREF=\"(iso-639-3_[0-9]+\.tab)\">/;
+`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
+`cp -f \"$1\" \"iso-639-3.tab\"`;
 
-$downloadasp =~ /Download ISO 639-3 Language Names Index <a HREF=\"([^\"]+)\">UTF-8/;
-`wget -N "http://www.sil.org/iso639-3/$1"`;
-`mv -f \"$1\" \"iso-639-3_Name_Index.tab\"`;
+$downloadasp =~ /<a HREF=\"(iso-639-3_Name_Index_[0-9]+\.tab)\">/;
+`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
+`cp -f \"$1\" \"iso-639-3_Name_Index.tab\"`;
 
-$downloadasp =~ /Download <a HREF=\"([^\"]+)\">ISO 639-3 code retirement mappings/;
-`wget -N "http://www.sil.org/iso639-3/$1"`;
-`mv -f \"$1\" \"iso-639-3_Retirements.tab\"`;
+$downloadasp =~ /<a HREF=\"(iso-639-3_Retirements_[0-9]+\.tab)\">/;
+`wget -N "http://www.sil.org/iso639-3/$1" -owget.log`;
+`cp -f \"$1\" \"iso-639-3_Retirements.tab\"`;
+
+`wget "http://www.crosswire.org/wiki/Special:Export/Localized_Language_Names" -Olocalized.txt -owget.log`;
+
+open LOC, "<:utf8", "localized.txt";
+while (<LOC>) {
+    push @content, $_;
+}
+
+open LOC, ">:utf8", "localized.txt";
+
+foreach $line (@content) {
+    if ($line =~ /^\|\s*([a-zA-Z\-]+)\s*\|\|\s*([^\|]+)\s+(\|\|\s*(.+))?/) {
+#	if ($4 !~ /dead/) { #turn this on to remove langs tagged as dead
+	    print LOC "$1\t$2\n";
+#	}
+    }
+}




More information about the sword-cvs mailing list