[sword-svn] r141 - trunk/modules/perlconverters
chrislit at www.crosswire.org
chrislit at www.crosswire.org
Wed Jun 11 23:06:03 MST 2008
Author: chrislit
Date: 2008-06-11 23:06:02 -0700 (Wed, 11 Jun 2008)
New Revision: 141
Modified:
trunk/modules/perlconverters/usfm2osis.pl
Log:
fixed a few bugs, improved a few things for broader USFM support
Modified: trunk/modules/perlconverters/usfm2osis.pl
===================================================================
--- trunk/modules/perlconverters/usfm2osis.pl 2008-04-03 09:46:47 UTC (rev 140)
+++ trunk/modules/perlconverters/usfm2osis.pl 2008-06-12 06:06:02 UTC (rev 141)
@@ -1,10 +1,10 @@
#!/usr/bin/perl
-## USFM to OSIS (2.0) converter
+## USFM to OSIS (2.1.1) converter
## Licensed under the standard BSD license:
-# Copyright (c) 2002,2003,2007 CrossWire Bible Society <http://www.crosswire.org/>
+# Copyright (c) 2002-2008 CrossWire Bible Society <http://www.crosswire.org/>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
@@ -39,8 +39,8 @@
#########################################################################
-$version = "1.1";
-$date = "2007-04-23";
+$version = "1.2";
+$date = "2008-05-16";
$osisVersion = "2.1.1";
%OSISbook = (
@@ -98,7 +98,7 @@
}
}
-push (@outdata, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<work osisWork=\"$osisWork\"\/>\n<\/header>\n");
+push (@outdata, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<osis xmlns=\"http://www.bibletechnologies.net/2003/OSIS/namespace\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.$osisVersion.xsd\">\n<osisText osisRefWork=\"Bible\" xml:lang=\"en\" osisIDWork=\"$osisWork\">\n<header>\n<work osisWork=\"$osisWork\"\/>\n<\/header>\n");
$tagStack = "<\/osisText><\/osis>";
$chapClose = "";
@@ -108,7 +108,7 @@
$tag = @_[0];
if ($tagStack =~ /$tag/) {
- $tagStack =~ s/(.*?$tag)//;
+ $tagStack =~ s/^(.*?$tag)//;
$taglist = $1;
$taglist =~ s/>/>\n/g;
$taglist =~ s/(<\/\w+)\s+[^>]+>/$1>/g;
@@ -126,7 +126,7 @@
}
foreach $file (@files) {
- @filedata = `uconv -f windows-1252 -t utf-8 $file`;
+ @filedata = `cat \"$file\"`;
$ollevel = 0;
$vers = 0;
@@ -159,18 +159,25 @@
### File Identification
+ $line =~ s/\\v\b\s+(\d+)(\-\d+|\s*\\v\b\s+\d+)\s*\\v\b\s+(\d+)/\\v $1\-$3/;
+ $line =~ s/\\v\b\s+(\d+)\s*\\v\b\s+(\d+\-)?(\d+)/\\v $1\-$3/;
+ $line =~ s/^\\(p[is]|mi)\b/\\p/;
+ $line =~ s/^\\li\b/\\p/; #\li isn't part of USFM, so we'll make it \p
+
# \id (book marker)
if ($line =~ /^\\id\b\s*([^ ]*)/) {
$book = $OSISbook{$1};
$chap = 0;
- if ($chapClose =~ "<verse") {
- push (@outdata, $verseClose); # close verse
- $verseClose = "";
+ if ($versClose =~ /<verse/) {
+ push (@outdata, $versClose); # close verse
+ $versClose = "";
}
- if ($chapClose =~ "<chapter") {
+# push (@outdata, closeTag("<\/div[^>]*?>")); # close section
+ if ($chapClose =~ /<chapter/) {
push (@outdata, $chapClose); # close chapter
$chapClose = "";
}
+
push (@outdata, closeTag("<\/div type=\"book\">")); #close book
if ($book eq "") {
$book = "UnknownUSFMBook";
@@ -199,6 +206,11 @@
openTag("<\/div>");
}
+ # \imt major title
+ if ($line =~ /^\\imt\b\s*(.+)/) {
+ $line = "<title>$1<\/title>";
+ }
+
# \is introduction section title
if ($line =~ /^\\is(\d*)\b\s*(.*)/) {
$level = $1;
@@ -222,13 +234,16 @@
elsif ($ollevel > $1) {
$line = "";
while ($ollevel > $1) {
- $line .= "<\/list>";
+ $line .= "<\/list><\/item>\n";
$ollevel--;
}
$line .= "<item>$2<\/item>";
}
elsif ($ollevel < $1) {
$line = "";
+ if ($ollevel != 0) {
+ $line .= "<item>";
+ }
while ($ollevel < $1) {
$line .= "<list>\n";
$ollevel++;
@@ -239,6 +254,7 @@
if (@filedata[$i+1] !~ /^\\io/) {
while ($ollevel > 0) {
$line .= "\n<\/list>";
+ if ($ollevel > 1) {$line .= "<\/item>";}
$ollevel--;
}
if ($ollevel == 0) {
@@ -267,7 +283,7 @@
push (@outdata, $versClose);
$versClose = "";
push (@outdata, closeTag("<\/p>"));
- if ($chapClose =~ "<chapter") {
+ if ($chapClose =~ /<chapter/) {
push (@outdata, $chapClose); # close previous chapter
$chapClose = "";
} else {
@@ -279,13 +295,13 @@
$line =~ s/\\c\b\s*([^ ]*)//;
}
- # \d majorSection
- if ($line =~ /^\\d\b\s*(.+)/) {
+ # \d \ms majorSection
+ if ($line =~ /^\\(ms|d)\b\s*(.+)/) {
push (@outdata, closeTag("<\/p>"));
push (@outdata, closeTag("<\/div type=\"majorSection\">"));
push (@outdata, "<div type=\"majorSection\">\n");
openTag("<\/div type=\"majorSection\">");
- $line =~ s/\\d\b\s*(.+)/<title>$1<\/title>/;
+ $line =~ s/\\(ms|d)\b\s*(.+)/<title>$1<\/title>/;
}
# \s section
@@ -300,18 +316,18 @@
}
}
- # \ss subSection
- if ($line =~ /^\\ss\b\s*(.+)/) {
- $line =~ s/\\ss\b\s*(.+)/<title>$1<\/title>/;
+ # \ss \s2 subSection
+ if ($line =~ /^\\s[s2]\b\s*(.+)/) {
+ $line =~ s/\\s[s2]\b\s*(.+)/<title>$1<\/title>/;
}
- # \sss x-subsubSection
- if ($line =~ /^\\sss\b\s*(.+)/) {
+ # \sss \s3 x-subsubSection
+ if ($line =~ /^\\s(ss|3)\b\s*(.+)/) {
push (@outdata, closeTag("<\/p>"));
push (@outdata, closeTag("<\/div type=\"x=subSubSection\">"));
push (@outdata, "<div type=\"x-subSubSection\">\n");
openTag("<\/div type=\"x-subSubSection\">");
- $line =~ s/\\sss\b\s*(.+)/<title>$1<\/title>/;
+ $line =~ s/\\s(ss|3)\b\s*(.+)/<title>$2<\/title>/;
}
# \p paragraph
@@ -386,6 +402,12 @@
$line =~ s/(<note [^>]+>)([A-Z][^a-z:]*?):/$1<catchWord>$2<\/catchWord>/g;
}
+ # \f if we STILL have notes, just change them to <note>
+ if ($line =~ /\\f\b\s*/) {
+ $line =~ s/\\f\b\s*/<note>/;
+ }
+
+
# \x crossReference
for ($j = 2; $j > 0; $j--) {
if ($line =~ /\\x\b\s*\\rf\s*([^\\]+)\\rf\*\s*/) {
@@ -466,6 +488,30 @@
# \th table heading
if ($line =~ /^\\t/) {
+ if ($line =~ /^\\tr\b\s*(\\th.*)/) {
+ $line = "$1";
+ if ($table != 1) {
+ push (@outdata, "<table>\n");
+ $table = 1;
+ }
+ $line =~ s/\\th\d?\b\s*(.+?)\s*(?=(\\th|$))/<cell role=\"label\">$1<\/cell>/g;
+ $line = "<row>$line<\/row>";
+ }
+
+ if ($line =~ /^\\tr\b\s*(\\tc.*)/) {
+ $line = $1;
+ if ($table != 1) {
+ push (@outdata, "<table>\n");
+ $table = 1;
+ }
+ $line =~ s/\\tcr?\d?\b\s*(.+?)\s*(?=(\\tc|$))/<cell>$1<\/cell>/g;
+ $line = "<row>$line<\/row>";
+ if (@filedata[$i+1] !~ /\\tr/) {
+ $line .= "<\/table>\n";
+ $table = 0;
+ }
+ }
+
if ($line =~ /^\\th1\b\s*(.*)/) {
if ($table != 1) {
push (@outdata, "<table>\n");
@@ -475,7 +521,8 @@
}
elsif ($line =~ /^\\th\d+\b\s*(.*)/) {
$line = "<cell role=\"label\">$1<\/cell>\n";
- }
+ }
+
if ($line =~ /^\\tb1\b\s*(.*)/) {
if ($table != 1) {
push (@outdata, "<table>\n");
@@ -514,11 +561,16 @@
}
}
- # \mt title
- if ($line =~ /^\\mt\b\s*(.+)/) {
+ # \mt\mt1 title
+ if ($line =~ /^\\mt[1]?\b\s*(.+)/) {
$line = "<title type=\"main\">$1<\/title>";
}
+ # \mt2 title
+ if ($line =~ /^\\mt2\b\s*(.+)/) {
+ $line = "<title type=\"continued\">$1<\/title>";
+ }
+
# \st,\st2 title
if ($line =~ /^\\st2?\b\s*(.+)/) {
$line = "<title type=\"continued\">$1<\/title>";
@@ -553,6 +605,7 @@
# remove unnecessary tags
$line =~ s/\\b\b//;
$line =~ s/\\m\b//;
+ $line =~ s/\\restore\b//;
$line =~ s/\\bq\*/<\/p><\/q>/g;
@@ -592,6 +645,8 @@
@filedata = <INF>;
close (INF);
open (OUTF, ">$outputFilename");
+
+#bubble chapter down
for ($i = 0; $i < scalar(@filedata); $i++) {
if (@filedata[$i] =~ /^<\// && @filedata[$i-1] =~ /^<chapter.+\/>/) {
$temp = @filedata[$i];
@@ -603,6 +658,7 @@
for ($i = 0; $i < scalar(@filedata); $i++) {
$fullfile .= @filedata[$i];
}
+$fullfile =~ s/<\/div>\n(<chapter eID[^>]+>)/$1\n<\/div>/mg; #swap the chapter back up one before the book closer
$q = 1;
More information about the sword-cvs
mailing list