[sword-svn] r142 - trunk/modules/perlconverters
chrislit at www.crosswire.org
chrislit at www.crosswire.org
Thu Jun 12 02:04:39 MST 2008
Author: chrislit
Date: 2008-06-12 02:04:38 -0700 (Thu, 12 Jun 2008)
New Revision: 142
Modified:
trunk/modules/perlconverters/usfm2osis.pl
Log:
added improved encoding support (removing need for uconv)
added some status messages
Modified: trunk/modules/perlconverters/usfm2osis.pl
===================================================================
--- trunk/modules/perlconverters/usfm2osis.pl 2008-06-12 06:06:02 UTC (rev 141)
+++ trunk/modules/perlconverters/usfm2osis.pl 2008-06-12 09:04:38 UTC (rev 142)
@@ -39,8 +39,8 @@
#########################################################################
-$version = "1.2";
-$date = "2008-05-16";
+$version = "1.3";
+$date = "2008-06-12";
$osisVersion = "2.1.1";
%OSISbook = (
@@ -67,37 +67,63 @@
"1MA" => "1Macc", "2MA" => "2Macc", "3MA" => "3Macc", "4MA" => "4Macc",
"1ES" => "1Esd", "2ES" => "2Esd", "MAN" => "PrMan",
# Following this is just an uneducated guess
-"PS2" => "Ps151", "ODA" => "Odes", "PSS" => "PssSol", "JSA" => "Josh",
+ "PS2" => "Ps151", "ODA" => "Odes", "PSS" => "PssSol", "JSA" => "Josh",
"JSB" => "Josh", "TBS" => "Tob", "SST" => "Sus", "DNT" => "Dan",
"BLT" => "Bel", "ADE" => "AddEsth"
);
+use Encode;
+ at encodingList = Encode->encodings(":all");
+foreach $enc (@encodingList) {
+ $encodings .= "$enc, ";
+}
+$encodings =~ s/\, $//;
+
+
if (scalar(@ARGV) < 2) {
- print "usfm2osis.pl -- USFM to OSIS $osisVersion converter version $version ($date)\nSyntax: usfm2osis.pl <osisWork> [-o OSIS-file] <USFM filenames|wildcard>\n";
+ print "usfm2osis.pl -- USFM to OSIS $osisVersion converter version $version ($date)\nSyntax: usfm2osis.pl <osisWork> [-o OSIS-file] [-e USFM encoding] <USFM filenames|wildcard>\n\n";
+ print "Supported encodings include:\n\t$encodings\n\n";
+ print "If the encoding is omitted, utf8 is the default value.\n";
exit (-1);
}
$osisWork = $ARGV[0];
-if ($ARGV[1] eq "-o") {
- $outputFilename = "$ARGV[2];"
+$nextarg = 1;
+
+if ($ARGV[$nextarg] eq "-o") {
+ $outputFilename = "$ARGV[$nextarg+1]";
+ $nextarg += 2;
}
else {
$outputFilename = "$osisWork.osis.xml";
}
-open (OUTF, ">$outputFilename") or die "Could not open file $ARGV[2] for writing.";
+open (OUTF, , ">:utf8", "$outputFilename") or die "Could not open file $ARGV[2] for writing.";
-if ($ARGV[1] eq "-o") {
- for ($i = 3; $i < scalar(@ARGV); $i++) {
- push(@files, $ARGV[$i]);
- }
+if ($ARGV[$nextarg] eq "-e") {
+ $inputEncoding = "$ARGV[$nextarg+1]";
+ $nextarg += 2;
}
else {
- for ($i = 1; $i < scalar(@ARGV); $i++) {
- push(@files, $ARGV[$i]);
+ $inputEncoding = "utf8";
+}
+$encFound = 0;
+foreach $enc (@encodingList) {
+ if ($enc eq $inputEncoding) {
+ $encFound = 1;
}
}
+if ($encFound == 0) {
+ die "Encoding $inputEncoding not supported.\nSupported encodings include:\n\t$encodings\n";
+}
+else {
+ print "Encoding \"$inputEncoding\" is supported.\n"
+}
+for (; $nextarg < scalar(@ARGV); $nextarg++) {
+ push(@files, $ARGV[$nextarg]);
+}
+
push (@outdata, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<work osisWork=\"$osisWork\"\/>\n<\/header>\n");
$tagStack = "<\/osisText><\/osis>";
@@ -126,7 +152,15 @@
}
foreach $file (@files) {
- @filedata = `cat \"$file\"`;
+ print "Processing $file.\n";
+ open (SFM, "$file");
+ my @filedata = "";
+ while (<SFM>) {
+ my $sfline;
+ $sfline = decode($inputEncoding, $_);
+ push (@filedata, $sfline);
+ }
+ close (SFM);
$ollevel = 0;
$vers = 0;
@@ -619,8 +653,6 @@
push (@outdata, "$line\n");
}
}
-
- close (INF);
}
push (@outdata, closeTag("<\/osis>"));
@@ -638,9 +670,10 @@
print OUTF @outdata[$i];
}
}
-
close (OUTF);
+print "Doing some cleanup.\n";
+
open (INF, "$outputFilename");
@filedata = <INF>;
close (INF);
@@ -660,6 +693,8 @@
}
$fullfile =~ s/<\/div>\n(<chapter eID[^>]+>)/$1\n<\/div>/mg; #swap the chapter back up one before the book closer
+print "Tagging quotations.\n";
+
$q = 1;
$fullfile =~ s/\$([^\%]+?)\%/"<q level=\"2\" sID=\"q2." . $q . "\"\/>" . $1 . "<q level=\"2\" eID=\"q2." . $q++ . "\"\/>"/eg;
@@ -680,7 +715,7 @@
$fullfile =~ s/\^/"<q level=\"1\" eID=\"q1." . $q++ . ".false\"\/>"/eg;
-
-
print OUTF $fullfile;
close (OUTF);
+
+print "All done! OSIS file: $outputFilename\n";
More information about the sword-cvs
mailing list