- $2<\/item>\n";
}
if (@filedata[$i+1] !~ /^\\io/) {
while ($ollevel > 0) {
$line .= "\n<\/list>";
if ($ollevel > 1) {$line .= "<\/item>";}
$ollevel--;
}
if ($ollevel == 0) {
$line .= "\n<\/div>";
}
}
}
# \ip introduction paragraph
if ($line =~ /^\\ip\b\s*(.*)/) {
$line = "
$1<\/p>";
}
# \im introduction paragraph ('left flush' - NOT IMPLENTED FLUSH)
if ($line =~ /^\\im\b\s*(.*)/) {
$line = "
$1<\/p>";
}
# \im introduction quotation (implemented as ordinary quotation)
if ($line =~ /^\\imq\b\s*(.*)/) {
$line = "$1<\/q>";
}
# \iq line (including \iq#), adapted from \q (see below), needs more clean-up
if ($line =~ /^\\iq/) {
if ($l != 1) {
push (@outdata, "\n");
$l = 1;
}
if ($line =~ /\\iq(\d*)$/) {
if ($1 eq "") {
$line = "\n";
}
else {
$line = "\n";
}
@filedata[$i+1] .= "<\/l>";
if (@filedata[$i+2] !~ /\\iq(?!t)/) {
@filedata[$i+1] .= "\n<\/lg>";
$l = 0;
}
}
else {
$line =~ s/\\iq\b\s*(.+)/$1<\/l>/;
$line =~ s/\\iq(\d+)\b\s*(.+)/$2<\/l>/;
if (@filedata[$i+1] !~ /\\iq(?![ta])/) {
$line .= "\n<\/lg>";
$l = 0;
}
}
}
# \ib introduction blank line
if ($line =~ /^\\ib$/) {
$line = "
";
}
# \ior..\ior* introduction reference
# These ranges are usually massive. I quite likely would prefer to wipe the tag and leave the references as plan text
# $line =~ s/\\ior( .*?)\\ior\*/$1/g;
$line =~ s/\\ior( .*?)\\ior\*/$1<\/reference>/g;
# \ie introduction end (discard)
if ($line =~ /^\\ie\b/) {
$line = "";
}
### Titles, Headings, and Labels (elsewhere?)--Markers Supported: \d, \ms#, \s#, \mt#, \r, \sp, \rq..\rq*
#### Markers Not Yet Supported: \mte#, \mr, \sr
# \ms majorSection
if ($line =~ /^\\ms\d?\b\s*(.*)/) {
push (@outdata, closeTag("<\/p>"));
push (@outdata, closeTag("<\/div type=\"majorSection\">"));
push (@outdata, "\n");
openTag("<\/div type=\"majorSection\">");
$line =~ s/\\ms\d?\b\s*(.+)/
$1<\/title>/;
$line =~ s/\\ms\d?\b\s*//;
}
# \d canonical title
if ($line =~ /^\\d\b\s*(.+)?(\\d\*)?/) {
push (@outdata, closeTag("<\/p>"));
$line =~ s/\\d\b\s*(.+)/$1<\/title>/;
}
# \s \s1 section (From Chapters and Verses)
if ($line =~ /^\\s1?\b\s*(.*)/) {
push (@outdata, closeTag("<\/p>"));
push (@outdata, closeTag("<\/div type=\"section\">"));
push (@outdata, "\n");
openTag("<\/div type=\"section\">");
$line =~ s/\\s1?\b\s*(.+)/
$1<\/title>/;
$line =~ s/\\s1?\b\s*//;
if ($line =~ /HEBREW TITLE/) {
$line =~ s///;
}
}
# \ss \s2 subSection (From Chapters and Verses)
if ($line =~ /^\\s[s2]\b\s*(.*)/) {
push (@outdata, closeTag("<\/p>"));
push (@outdata, closeTag("<\/div type=\"subSection\">"));
push (@outdata, "\n");
openTag("<\/div type=\"subSection\">");
$line =~ s/\\s[s2]\b\s*(.+)/
$1<\/title>/;
$line =~ s/\\s[s2]\b\s*//;
}
# \sss \s3 x-subSubSection (From Chapters and Verses)
# This will also handle deeper levels (4+) of subsections.
if ($line =~ /^\\s(ss|\d+)\b\s*(.*)/) {
my $ssLvl = $1;
my $ssType = "";
while ($ssLvl > 2) {
$ssLvl--;
$ssType .= "Sub";
}
$ssType = "x-sub" . $ssType . "Section";
push (@outdata, closeTag("<\/p>"));
push (@outdata, closeTag("<\/div type=\"$ssType\">"));
push (@outdata, "\n");
openTag("<\/div type=\"$ssType\">");
$line =~ s/\\s(ss|\d+)\b\s*(.+)/
$2<\/title>/;
$line =~ s/\\s(ss|\d+)\b\s*//;
}
# \mt\mt1 title
if ($line =~ /^\\mt[1234]?\b\s*(.+)/) {
$line = "$1<\/title>";
}
# \mt2 title
if ($line =~ /^\\mt2\b\s*(.+)/) {
$line = "$1<\/title>";
}
# \st,\st2 title
if ($line =~ /^\\st2?\b\s*(.+)/) {
$line = "$1<\/title>";
}
# \st3 title
if ($line =~ /^\\st3\b\s*(.+)/) {
$line = "$1<\/title>";
}
# \r sub title
if ($line =~ /^\\mr\b\s*(.+)/) {
$line = "$1<\/title>";
}
# \r parallel title
if ($line =~ /^\\r\b\s*(.+)/) {
$line = "$1<\/title>";
}
# \sp speaker
if ($line =~ /^\\sp\b\s*(.+)/) {
$line = "$1<\/speaker>";
}
# \rq..\rq* inline reference
$line =~ s/\\rq( .*?)\\rq\*/$1<\/reference>/g;
### Chapters and Verses--Markers Supported: \c, \v, \vp...\vp*, \cl
#### Markers Not Yet Supported: \ca...\ca*, \cp, \cd, \va...\va*
# \c chapter
if ($line =~ /^\\c\b\s*([^ ]*)/) {
if ($1 ne "") {
$chap = $1;
}
else {
$chap++;
}
push (@outdata, $versClose);
$versClose = "";
if ($moduleType eq "bible") {
push (@outdata, closeTag("<\/p>"))
}
if ($chapClose =~ /")); # close introduction div
}
push (@outdata, "\n");
$chapClose = "\n";
$line =~ s/\\c\b\s*([^ ]*)//;
}
# \cl chapter label
if ($line =~ /^\\cl\b\s*(.*)/) {
$line = "$1<\/title>";
}
# \v verse
if ($line =~ /^\\v\b\s*(\d[^\\ ]*)?/) {
if ($1 ne "") {
$vers = $1;
}
else {
$vers++;
}
push (@outdata, $versClose);
$divOpen=false;
$versClose = "";
if ($vers =~ /(\d+[^\\\- ]*)\-(\d+[^\\ ]*)/) {
$vF = $1;
$vT = $2;
$vF =~ /^(\d+)/;
$vFn = scalar($1);
$vT =~ /^(\d+)/;
$vTn = scalar($1);
$osisID = "$book.$chap.$vF";
if ($vTn > $vFn && $vFn > 0) {
for ($j = $vFn + 1; $j < $vTn; $j++) {
$osisID .=" $book.$chap.$j";
}
}
$osisID .= " $book.$chap.$vT";
}
else {
$osisID = "$book.$chap.$vers";
}
if ($moduleType eq "bible") {
push (@outdata, "\n");
$versClose = "\n";
$line =~ s/\\v\b\s*(\d[^\\ ]*)? *//;
}
elsif ($moduleType eq "comment") {
closeTag("<\/p>");
push (@outdata, "\n");
$versClose = "<\/p>\n<\/div>\n";
$line =~ s/\\v\b\s*(\d[^\\ ]*)? *//;
$divOpen=true;
}
else { print "usfm2osis.pl supports only the module types \"bible\" and \"comment\" \n";
exit 1;
}
}
# \vp...\vp# published verse numbers
$line =~ s/\\vp\*\s*//g;
$line =~ s/\\vp\b\s*(\d+[a-z]?|[a-z])\s*/
$1<\/seg>/g;
### Paragraphs--Markers Supported: \p, \b, \m, \nb, \cls
#### Markers Not Yet Supported: \pmo, \pm, \pmc, \pmr, \pi#, \mi, \li#, \pc, \pr, \ph#
# Hack to solve an issue in a module that used for linebreaks in the usfm files--may be commented out (not USFM 2.1)
# $line =~ s/\\lb\*/
/g;
# \p paragraph (From Chapters and Verses)
if ($line =~ /^\\p\b\s*/) {
if ((($moduleType eq "comment") && ($divOpen))||($moduleType eq "bible")){
push (@outdata, closeTag("<\/p>"));
}
push (@outdata, "\n");
openTag("<\/p>");
$line =~ s/\\p\b\s*//;
}
# \pc paragraph centered (From Chapters and Verses)
if ($line =~ /^\\pc\b\s*/) {
if ((($moduleType eq "comment") && ($divOpen))||($moduleType eq "bible")){
push (@outdata, closeTag("<\/p>"));
}
push (@outdata, "
\n");
openTag("<\/p>");
$line =~ s/\\pc\b\s*//;
}
# \mi2 paragraph flush left, no indentation
if ($line =~ /^\\mi2\b\s*/) {
if ((($moduleType eq "comment") && ($divOpen))||($moduleType eq "bible")){
push (@outdata, closeTag("<\/p>"));
}
push (@outdata, "
\n");
openTag("<\/p>");
$line =~ s/\\mi2\b\s*//;
}
# \cls paragraph (From Chapters and Verses)
if ($line =~ /^\\cls\b\s*/) {
if ((($moduleType eq "comment") && ($divOpen))||($moduleType eq "bible")){
push (@outdata, closeTag("<\/closer>"));
}
push (@outdata, "\n");
openTag("<\/closer>");
$line =~ s/\\cls\b\s*//;
}
# \b
$line =~ s/\\b\b/
/;
# \m
$line =~ s/\\m\b//;
# \nb
$line =~ s/\\nb\b//;
### Poetry--Markers Supported: \q#, \qs...\qs*, \qr, \qc, \qac...\qac*, \qa, \qm#
#### Markers Not Yet Supported: [none]
# \qt...\qt*, OT quotation (handle early)
$line =~ s/\\qt\b\s*(.*?)\\qt\*/$1<\/seg>/g;
# \qa, acrostic heading
$line =~ s/^\\qa\b\s*(.*)/$1<\/title>/g;
# \qac...\qac*, acrostic character style (used within a line)
$line =~ s/\\qac\b\s*(.*?)\\qac\*/$1<\/hi>/g;
# \q line (including \q#, \qr, \qc, and \qs...\qs*)
if ($line =~ /^\\q/) {
if ($l != 1) {
push (@outdata, "\n");
$l = 1;
}
if ($line =~ /\\qm?(c|r|\d*)$/) {
if ($1 eq "") {
$line = "\n";
}
elsif ($1 eq "c") {
$line = "";
}
elsif ($1 eq "r") {
$line = "";
}
else {
$line = "\n";
}
@filedata[$i+1] .= "<\/l>";
if (@filedata[$i+2] !~ /\\q(?!t)/) {
@filedata[$i+1] .= "\n<\/lg>";
$l = 0;
}
}
else {
$line =~ s/\\q\b\s*(.+)/$1<\/l>/;
$line =~ s/\\qm?(\d+)\b\s*(.+)/$2<\/l>/;
$line =~ s/\\qc\b\s*(.+)/$1<\/l>/;
$line =~ s/\\qr\b\s*(.+)/$1<\/l>/;
$line =~ s/\\qs\b\s*(.+?)\s*\\qs\*/$1<\/l>/;
if (@filedata[$i+1] !~ /\\q(?![ta])/) {
$line .= "\n<\/lg>";
$l = 0;
}
}
$line =~ s/\s*\\qs\b\s*(.+?)\s*\\qs\*/<\/l>\n$1/;
}
$line =~ s/\s*\\qs\b\s*(.+?)\s*\\qs\*\s*/$1<\/l><\/lg>/;
### Tables--Markers Supported: \tr, \th#, \tc#, \tcr#
####Markers Not Yet Supported: \thr#
# \th table heading
if ($line =~ /^\\t/) {
if ($line =~ /^\\tr\b\s*(\\th.*)/) {
$line = "$1";
if ($table != 1) {
push (@outdata, "\n");
$table = 1;
}
$line =~ s/\\th\d?\b\s*(.+?)\s*(?=(\\th|$))/$1<\/cell>/g;
$line = "$line<\/row>";
}
if ($line =~ /^\\tr\b\s*(\\tc.*)/) {
$line = $1;
if ($table != 1) {
push (@outdata, "\n");
$table = 1;
}
$line =~ s/\\tcr?\d?\b\s*(.+?)\s*(?=(\\tc|$))/$1<\/cell>/g;
$line = "$line<\/row>";
if (@filedata[$i+1] !~ /\\tr/) {
$line .= "<\/table>\n";
$table = 0;
}
}
if ($line =~ /^\\th1\b\s*(.*)/) {
if ($table != 1) {
push (@outdata, "\n");
$table = 1;
}
$line = "$1<\/cell>\n";
}
elsif ($line =~ /^\\th\d+\b\s*(.*)/) {
$line = "$1<\/cell>\n";
}
if ($line =~ /^\\tb1\b\s*(.*)/) {
if ($table != 1) {
push (@outdata, "\n");
$table = 1;
}
else {
push (@outdata, "<\/row>");
}
$line = "$1<\/cell>\n";
if (@filedata[$i+1] !~ /\\tb/) {
$line .= "<\/row><\/table>\n";
$table = 0;
}
}
elsif ($line =~ /^\\tb\d+\b\s*(.*)/) {
$line = "$1<\/cell>\n";
if (@filedata[$i+1] !~ /\\tb/) {
$line .= "<\/row><\/table>\n";
$table = 0;
}
}
}
sub parseRef {
$ref = @_[0];
$ref =~ s/[:\.]\s*$//;
$ref =~ s/:/\./g;
$ref = "$book.$ref";
$ref =~ s/(\d+)\.(\d[^\,]+)\-(\d+)/$1.$2-$book.$1.$3/;
$ref =~ s/(\d+)\.(\d[^\-]+)\-+\s*(\d.+)/$1.$2\-$book.$1.$3/;
return $ref;
}
### Footnotes--Markers Supported: \fk, \fq, \f...\f*, \fv, \ft, \fqa
####Markers Not Yet Supported: \fe...\fe*, \fr, \fl, \fp, \fdc...\fdc*, \fm...\fm*
sub footnoteHandler {
$note = @_[0];
$note = "$note";
# \fk Catch Words
$note =~ s/\\fk\s(.+?)(\s*)\\fk\*/\\fX$1<\/catchWord>\\fX$2/g;
$note =~ s/\\fk\s(.+?)(\s*)(?=\\f)/\\fX$1<\/catchWord>$2\\fX/g;
$note =~ s/\\fk\*/\\fX/g;
# \fq Quotations in Footnotes
# CCL--I don't know the difference, aside from length, between catch words and quotations in footnotes. It may vary by document.
$note =~ s/\\fq\s(.+?)(\s*)\\fq\*/\\fX$1<\/catchWord>\\fX$2/g;
$note =~ s/\\fq\s(.+?)(\s*)(?=\\f)/\\fX$1<\/catchWord>$2\\fX/g;
$note =~ s/\\fq\*/\\fX/g;
# \fqa Alternate translations in Footnotes
$note =~ s/\\fqa\s(.+?)\\fqa\*/\\fX$1<\/rdg>\\fX/g;
$note =~ s/\\fqa\s(.+?)(?=\\f)/\\fX$1<\/rdg>\\fX/g;
$note =~ s/\\fqa\*/\\fX/g;
# \fv Footnote verse number
$note =~ s/\\fv\s(.+?)\\fv\*/\\fX$1<\/reference>\\fX/g;
$note =~ s/\\fv\s*(\d+)\b\s*(?=\\f)/\\fX$1<\/reference>\\fX/g;
$note =~ s/\\fv\*/\\fX/g;
# \fr Footnote origin reference (the verse where the fn appears)
while ($note =~ /\\fr\s*(.+?)\s*(?=\\f)/) {
$sourceVal = parseRef($1);
$nFN++;
# $note =~ s/\\fr\s*(.+?)\s*(?=\\f)//;
$note =~ s/\\fr\s*//;
$note =~ s///;
}
# \ft Footnote text
$note =~ s/\\ft\s//g;
$note =~ s/\\ft\*//g;
# \f* Footnote closer
$note =~ s/\s*\\f\*//;
# \f Footnote opener
$note =~ s/\\fe?\b\s*([^\s]\s*)?//;
# \fX was inserted above to mark former locations of various already-handled markers, which can now be removed
$note =~ s/\\fX//g;
return $note;
}
$line =~ s/(\\f\b.+?\\f\*)/footnoteHandler($1)/eg;
### Crossreferences--Markers Supported: \x + \xo...\x*, \xk, \xq, \xt
#### Markers Not Yet Supported: \xdc...\xdc*
sub xrefHandler {
$xref = @_[0];
$xref = "$xref";
# \xk Catch Words
$xref =~ s/\\xk\s(.+?)(\s*)\\xk\*/$1<\/catchWord>$2/g;
$xref =~ s/\\xk\s(.+?)(\s*)(?=\\x)/$1<\/catchWord>$2/g;
$xref =~ s/\\xk\*//g;
# \xq Quotations in Footnotes
# CCL--I don't know the difference, aside from length, between catch words and quotations in footnotes. It may vary by document.
$xref =~ s/\\xq\s(.+?)(\s*)\\xq\*/$1<\/catchWord>$2/g;
$xref =~ s/\\xq\s(.+?)(\s*)(?=\\x)/$1<\/catchWord>$2/g;
$xref =~ s/\\xq\*//g;
# \xo Footnote origin reference (the verse where the fn appears)
while ($xref =~ /\\xo\s*(.+?)\s*(?=\\x)/) {
$sourceVal = parseRef($1);
$xFN++;
# $xref =~ s/\\xo\s*(.+?)\s*(?=\\x)//;
$xref =~ s/\\xo\s*//;
$xref =~ s///;
}
# \xt Crossref itself
$xref =~ s/\\xt\s(.+?)\\xt\*/$1<\/reference>/g;
$xref =~ s/\\xt\s(.+?)(?=\\x)/$1<\/reference>/g;
$xref =~ s/\\xt\*//g;
# \x* Footnote closer
$xref =~ s/\\x\*//;
# \x Footnote opener
$xref =~ s/\\x\b\s*([^\s]\s*)?//;
return $xref;
}
$line =~ s/(\\x\b.+?\\x\*)/xrefHandler($1)/eg;
# crossReference osisRef=""
$line =~ s/([^<]+)<\/reference>/$1<\/reference>/g;
$line =~ s/osisRef="\s/osisRef="\s/g;
$line =~ s/\s">/">/g;
$line =~ s/([^<]+)<\/reference>; $4<\/reference>; ([^<]+)<\/reference>, $4<\/reference>, $1<\/transChange>/g;
# \it...\it*, italic text
$line =~ s/\\it\b\s*(.*?)\\it\*/$1<\/hi>/g;
# \bd...\bd*, bold text
$line =~ s/\\bd\b\s*(.*?)\\bd\*/$1<\/hi>/g;
# \bk...\bk*, book name in text
$line =~ s/\\bk\b\s*(.*?)\\bk\*/$1<\/hi>/g;
# \sc...\sc*, small-caps character style (used within a line)
$line =~ s/\\sc\b\s*(.*?)\\sc\*/$1<\/hi>/g;
# \nd...\nd*, Divine Name
$line =~ s/\\nd\b\s*(.*?)\\nd\*/$1<\/divineName>/g;
# \pn...\pn*, Proper name
$line =~ s/\\pn\b\s*(.*?)\\pn\*/$1<\/name>/g;
# \tl...\tl*, Foreign Langauge (treated here merely as transliterated text)
$line =~ s/\\tl\b\s*(.*?)\\tl\*/$1<\/hi>/g;
# \sls...\sls*, Text from alternative text source
$line =~ s/\\sls\b\s*(.*?)\\sls\*/$1<\/hi>/g;
# \add...\add*, text added for translation purposes
$line =~ s/\\add\b\s*(.*?)\\add\*/$1<\/transChange>/g;
# \wj...\wj*, Words of Jesus
sub wjCount {
if (@_[0] eq "") {
$wj++;
return "sID=\"q.$wj\" ";
}
return "eID=\"q.$wj\" ";
}
#$line =~ s/\\wj\b\s*(.*?)\\wj\*/$1<\/q>/g;
$line =~ s/\\wj\b(\*?)\s*/""/eg;
# \pb, page break
$line =~ s/\\pb\b//g;
### Other (probably non-standard) items
### Markers Supported: \zelastic
# \zelastic, elastic height marker for typesetting
$line =~ s/\\zelastic\b//g;
$line =~ s/_/ /g;
### End USFM 2.1 Items
if ($line !~ /^\s*$/) {
push (@outdata, "$line\n");
}
}
}
if ($versClose =~ /"));
for ($i = 0; $i < scalar(@outdata); $i++) {
#@outdata[$i] =~ s/---/―/g; # m-dash
#@outdata[$i] =~ s/--/—/g; # n-dash
@outdata[$i] =~ s/([es]ID=\"[^\" ]+) [^\"]*\"/$1\"/;
}
for ($i = 0; $i < scalar(@outdata); $i++) {
if (@outdata[$i] !~ /^\s*$/) {
@outdata[$i] =~ s/[\r\n]+/\n/g;
@outdata[$i] =~ s/\n?$/\n/;
print OUTF @outdata[$i];
}
}
close (OUTF);
print "Doing some cleanup.\n";
open (INF, "<:utf8", "$outputFilename");
@filedata = ;
close (INF);
open (OUTF, ">:utf8", "$outputFilename");
# bubble chapter down
for ($i = 0; $i < scalar(@filedata); $i++) {
if (@filedata[$i] =~ /^<\// && @filedata[$i-1] =~ /^/) {
$temp = @filedata[$i];
@filedata[$i] = @filedata[$i-1];
@filedata[$i-1] = $temp;
$i -= 2;
}
}
# bubble verse end up
# CCL--this may require further attention, but works for the present
for ($i = 0; $i < scalar(@filedata); $i++) {
if (@filedata[$i-1] =~ /^(/) {
$temp = @filedata[$i];
@filedata[$i] = @filedata[$i-1];
@filedata[$i-1] = $temp;
$i -= 2;
}
}
for ($i = 0; $i < scalar(@filedata); $i++) {
$fullfile .= @filedata[$i];
}
$fullfile =~ s/<\/osisText>\n<\/osis>\n(]+>)/$1\n<\/osisText>\n<\/osis>/mg; #swap the chapter back up one before the osisText closer
$fullfile =~ s/<\/div>\n(]+>)/$1\n<\/div>/mg; #swap the chapter back up one before the book closer
#print "Tagging quotations.\n";
#$q = 1;
#$fullfile =~ s/\$([^\%]+?)\%/"" . $1 . ""/eg;
#$fullfile =~ s/\$/""/eg;
#$q = 1;
#while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/) {
# $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#(([^\@\#]+?\@[^\@\#]+?\#)+[^\@\#]+?\#)/$1 . "" . $2 . "" . $3/eg;
#}
#while ($fullfile =~ /(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/) {
# $fullfile =~ s/(\@[^\@\#]+?)\@([^\@\#]+?)\#([^\@\#]+?\#)/$1 . "" . $2 . "" . $3/eg;
#}
#$fullfile =~ s/\@([^\#]+?)\#/"" . $1 . ""/eg;
#$fullfile =~ s/\@/""/eg;
#$fullfile =~ s/\^/""/eg;
print OUTF $fullfile;
close (OUTF);
print "All done! OSIS file: $outputFilename\n";
| | | | | |