[sword-svn] r209 - trunk/modules/perlconverters
chrislit at crosswire.org
chrislit at crosswire.org
Thu Jun 18 15:37:02 MST 2009
Author: chrislit
Date: 2009-06-18 15:37:02 -0700 (Thu, 18 Jun 2009)
New Revision: 209
Modified:
trunk/modules/perlconverters/usfm2osis.pl
Log:
removed trailing whitespace from file
Modified: trunk/modules/perlconverters/usfm2osis.pl
===================================================================
--- trunk/modules/perlconverters/usfm2osis.pl 2009-06-18 22:31:19 UTC (rev 208)
+++ trunk/modules/perlconverters/usfm2osis.pl 2009-06-18 22:37:02 UTC (rev 209)
@@ -6,11 +6,11 @@
# Copyright (c) 2002-2008 CrossWire Bible Society <http://www.crosswire.org/>
# All rights reserved.
-#
+#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
-#
+#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
@@ -21,7 +21,7 @@
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written
# permission.
-#
+#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
@@ -44,7 +44,7 @@
$date = "2009-02-08";
# Sets the version of OSIS used in the OSIS header
$osisVersion = "2.1.1";
-# Stores the USFM Version
+# Stores the USFM Version
$usfmVersion = "2.1"; # The USFM reference document can be found at http://confluence.ubs-icap.org/display/USFM/Home;jsessionid=97071C5C1E562036A1CAF4FF77147565 (as of 2008-07-07)
# This is the hash which maps the conversion of USFM book abbreviations to OSIS book abbreviations. ***I would like to add the ability to access an external file to provide options for other languages. In other words, in preparing a USFM file for conversion, a separate file could be created which could be used to map the conversion of abbreviated book names in other languages to OSIS. This would be especially useful for cross-references, but I haven't figured out how to do it yet.
@@ -172,7 +172,7 @@
push (@filedata, $sfline);
}
close (SFM);
-
+
$ollevel = 0;
$vers = 0;
$chap = 0;
@@ -183,13 +183,13 @@
# Creates array for the attribute "n" in cross-references
@nCR = (a .. z);
# Sets the initial value for the attribute "n" in cross-references.
- $nCR = @nCR [0];
-
+ $nCR = @nCR [0];
+
#encoding stuff
for ($i = 0; $i < scalar(@filedata); $i++) {
$line = @filedata[$i];
$line =~ s/[\r\n]//g;
-
+
### Basic XML entity encoding
$line =~ s/&(?![a-zA-Z0-9])/&/g;
$line =~ s/<< ?/\@/g;
@@ -240,7 +240,7 @@
openTag("<\/div type=\"book\">");
$line = "";
}
-
+
# \h (running header--discard)
if ($line =~ /^\\h\b/) {
$line = "";
@@ -265,7 +265,7 @@
if ($line =~ /^\\toc\d\b/) {
$line = "";
}
-
+
### Introduction--Markers Supported: \imt#, \is#, \iot, \io#, \ip
#### Markers Not Yet Supported: \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili, \ior...\ior*, \iex, \imte, \ie
@@ -275,7 +275,7 @@
# openTag("<\/div>");
# }
- # \imt major title
+ # \imt major title
if ($line =~ /^\\imt\b\s*(.+)/) {
$line = "<div type=\"introduction\">\n<title>$1<\/title>";
openTag("<\/div>");
@@ -291,12 +291,12 @@
$line = "<div type=\"section\"><title>$2<\/title>";
openTag("<\/div>");
}
-
+
# \iot introduction outline title
if ($line =~ /^\\iot\b\s*(.*)/) {
$line = "<div type=\"outline\">\n<title>$1<\/title>";
}
-
+
# \io\d+ introduction outline item
if ($line =~ /^\\io(\d+)\b\s*(.*)/) {
if ($ollevel == $1) {
@@ -321,7 +321,7 @@
}
$line .= "<item>$2<\/item>\n";
}
-
+
if (@filedata[$i+1] !~ /^\\io/) {
while ($ollevel > 0) {
$line .= "\n<\/list>";
@@ -340,9 +340,9 @@
}
### Titles, Headings, and Labels (elsewhere?)--Markers Supported: \d, \ms#, \s#, \mt#, \r, \sp
- #### Markers Not Yet Supported: \mte#, \mr, \sr, \rq...\rq*
-
- # \d \ms majorSection
+ #### Markers Not Yet Supported: \mte#, \mr, \sr, \rq...\rq*
+
+ # \d \ms majorSection
if ($line =~ /^\\(ms|d)\b\s*(.+)/) {
push (@outdata, closeTag("<\/p>"));
push (@outdata, closeTag("<\/div type=\"majorSection\">"));
@@ -482,7 +482,7 @@
#### Markers Not Yet Supported: \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \cls, \li#, \pc, \pr, \ph#, \b
# Hack to solve an issue in a module that used <R> for linebreaks in the usfm files--may be commented out (not USFM 2.1)
- $line =~ s/\\lb\*/<lb \/>/g;
+ $line =~ s/\\lb\*/<lb \/>/g;
# \p paragraph (From Chapters and Verses)
if ($line =~ /^\\p\b\s*/) {
@@ -509,17 +509,17 @@
}
# \b
- $line =~ s/\\b\b//;
+ $line =~ s/\\b\b//;
# \m
- $line =~ s/\\m\b//;
+ $line =~ s/\\m\b//;
# \nb
- $line =~ s/\\nb\b//;
+ $line =~ s/\\nb\b//;
### Poetry--Markers Supported: \q#, \qs...\qs*, \qc, \qm#
#### Markers Not Yet Supported: \qr, \qa, \qac...\qac*, \b
# \qt...\qt*, OT quotation (handle early)
- $line =~ s/\\qt\b\s*(.*?)\\qt\*/<seg type="otPassage">$1<\/seg>/g;
+ $line =~ s/\\qt\b\s*(.*?)\\qt\*/<seg type="otPassage">$1<\/seg>/g;
# \q line
if ($line =~ /^\\q/) {
@@ -553,7 +553,7 @@
}
}
}
-
+
# \qs...\qs*, Selah
$line =~ s/\\qs\b\s*([^\\]+)\\qs\*/<l type="selah"> $1<\/l>/;
@@ -570,7 +570,7 @@
}
$line =~ s/\\th\d?\b\s*(.+?)\s*(?=(\\th|$))/<cell role=\"label\">$1<\/cell>/g;
$line = "<row>$line<\/row>";
- }
+ }
if ($line =~ /^\\tr\b\s*(\\tc.*)/) {
$line = $1;
@@ -592,7 +592,7 @@
$table = 1;
}
$line = "<row><cell role=\"label\">$1<\/cell>\n";
- }
+ }
elsif ($line =~ /^\\th\d+\b\s*(.*)/) {
$line = "<cell role=\"label\">$1<\/cell>\n";
}
@@ -610,16 +610,16 @@
$line .= "<\/row><\/table>\n";
$table = 0;
}
- }
+ }
elsif ($line =~ /^\\tb\d+\b\s*(.*)/) {
$line = "<cell>$1<\/cell>\n";
if (@filedata[$i+1] !~ /\\tb/) {
$line .= "<\/row><\/table>\n";
$table = 0;
}
- }
+ }
}
-
+
sub parseRef {
$ref = @_[0];
@@ -633,17 +633,17 @@
}
### Footnotes--Markers Supported: \fk, \fq, \f...\f*, \fv, \fqa
- ####Markers Not Yet Supported: \fe...\fe*, \fr, \fl, \fp, \ft, \fdc...\fdc*, \fm...\fm*
-
+ ####Markers Not Yet Supported: \fe...\fe*, \fr, \fl, \fp, \ft, \fdc...\fdc*, \fm...\fm*
+
sub footnoteHandler {
$note = @_[0];
$note = "<note>$note</note>";
-
+
# \fk Catch Words
$note =~ s/\\fk\s(.+?)\\fk\*/<catchWord>$1<\/catchWord>/g;
$note =~ s/\\fk\s(.+?)(?=\\f)/<catchWord>$1<\/catchWord>/g;
$note =~ s/\\fk\*//g;
-
+
# \fq Quotations in Footnotes
# CCL--I don't know the difference, aside from length, between catch words and quotations in footnotes. It may vary by document.
$note =~ s/\\fq\s(.+?)\\fq\*/<catchWord>$1<\/catchWord>/g;
@@ -657,18 +657,18 @@
# \fv Footnote verse number
$note =~ s/\\fv\s(.+?)\\fv\*/<reference osisID=\"$book.$chap.$1\">$1<\/reference>/g;
- $note =~ s/\\fv\s*(\d+)\b\s*(?=\\f)/<reference osisID=\"$book.$chap.$1\">$1<\/reference>/g;
+ $note =~ s/\\fv\s*(\d+)\b\s*(?=\\f)/<reference osisID=\"$book.$chap.$1\">$1<\/reference>/g;
$note =~ s/\\fv\*//g;
-
+
# \fr Footnote origin reference (the verse where the fn appears)
while ($note =~ /\\fr\s*(.+?)\s*(?=\\f)/) {
$sourceVal = parseRef($1);
- $nFN++;
+ $nFN++;
# $note =~ s/\\fr\s*(.+?)\s*(?=\\f)//;
$note =~ s/\\fr\s*//;
$note =~ s/<note>/<note n="$nFN">/;
}
-
+
# \ft Footnote text
$note =~ s/\\ft\s//g;
$note =~ s/\\ft\*//g;
@@ -678,23 +678,23 @@
# \f Footnote opener
$note =~ s/\\f\b\s*([^\s]\s*)?//;
-
+
return $note;
}
-
+
$line =~ s/(\\f\b.+?\\f\*)/footnoteHandler($1)/eg;
-
+
### Crossreferences--Markers Supported: \x + \xo...\x*, \xk, \xq, \xt
- #### Markers Not Yet Supported: \xdc...\xdc*
+ #### Markers Not Yet Supported: \xdc...\xdc*
sub xrefHandler {
$xref = @_[0];
$xref = "<note type=\"crossReference\">$xref</note>";
-
+
# \xk Catch Words
$xref =~ s/\\xk\s(.+?)\\xk\*/<catchWord>$1<\/catchWord>/g;
$xref =~ s/\\xk\s(.+?)(?=\\x)/<catchWord>$1<\/catchWord>/g;
$xref =~ s/\\xk\*//g;
-
+
# \xq Quotations in Footnotes
# CCL--I don't know the difference, aside from length, between catch words and quotations in footnotes. It may vary by document.
$xref =~ s/\\xq\s(.+?)\\xq\*/<catchWord>$1<\/catchWord>/g;
@@ -704,7 +704,7 @@
# \xo Footnote origin reference (the verse where the fn appears)
while ($xref =~ /\\xo\s*(.+?)\s*(?=\\x)/) {
$sourceVal = parseRef($1);
- $xFN++;
+ $xFN++;
# $xref =~ s/\\xo\s*(.+?)\s*(?=\\x)//;
$xref =~ s/\\xo\s*//;
$xref =~ s/<note type=\"crossReference\">/<note type=\"crossReference\" n="$xFN">/;
@@ -720,19 +720,19 @@
# \x Footnote opener
$xref =~ s/\\x\b\s*([^\s]\s*)?//;
-
+
return $xref;
}
-
+
$line =~ s/(\\x\b.+?\\x\*)/xrefHandler($1)/eg;
-
+
# crossReference osisRef=""
$line =~ s/<reference osisRef="">([^<]+)<\/reference>/<reference osisRef="$1">$1<\/reference>/g;
$line =~ s/osisRef="\s/osisRef="\s/g;
$line =~ s/\s">/">/g;
- $line =~ s/<reference osisRef="([^\s\"]+)\s/<reference osisRef="$1\./g; # Changes space after book name to a period
-
+ $line =~ s/<reference osisRef="([^\s\"]+)\s/<reference osisRef="$1\./g; # Changes space after book name to a period
+
$line =~ s/<reference osisRef="([^\"]+):([^\"]+)"/<reference osisRef="$1\.$2"/g; # Gen 1:1
$line =~ s/<reference osisRef="([^\.\"]+)\.(\d+)\.(\d+)-(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$2\.$4"/g; # Gen 1:1-2
$line =~ s/<reference osisRef="([^\.\"]+).(\d+):(\d+)-(\d+).(\d+)"/<reference osisRef="$1\.$2\.$3-$1\.$4\.$5"/g; # Gen 1:1-2:3
@@ -744,19 +744,19 @@
#### Markers Not Yet Supported: Special Text: \add...\add*, \bk...\bk*, \dc...\dc*, \k...\k*, \lit, \ord...\ord*, \sig...\sig*, \sls...\sls*, \wj...\wj*; Character Styling: \em...\em*, \bd...\bd*, \bdit...\bdit*, \no...\no*, \sc...\sc*; Spacing and Breaks: !$, //, \pb; Special Features: \fig...\fig*, \ndx...\ndx*, \pro...\pro*, \w...\w*, \wg...\wg*, \wh...\wh*
# \it...\it*, italic text
- $line =~ s/\\it\b\s*(.*?)\\it\*/<hi type=\"italic\">$1<\/hi>/g;
+ $line =~ s/\\it\b\s*(.*?)\\it\*/<hi type=\"italic\">$1<\/hi>/g;
# \nd...\nd*, Divine Name
- $line =~ s/\\nd\b\s*(.*?)\\nd\*/<divineName>$1<\/divineName>/g;
+ $line =~ s/\\nd\b\s*(.*?)\\nd\*/<divineName>$1<\/divineName>/g;
# \pn...\pn*, Proper name
- $line =~ s/\\pn\b\s*(.*?)\\pn\*/<name>$1<\/name>/g;
+ $line =~ s/\\pn\b\s*(.*?)\\pn\*/<name>$1<\/name>/g;
# \tl...\tl*, Foreign Langauge (treated here merely as transliterated text)
- $line =~ s/\\tl\b\s*(.*?)\\tl\*/<hi type="italic">$1<\/hi>/g;
+ $line =~ s/\\tl\b\s*(.*?)\\tl\*/<hi type="italic">$1<\/hi>/g;
# \add...\add*, text added for translation purposes
- $line =~ s/\\add\b\s*(.*?)\\add\*/<transChange type=\"added\">$1<\/transChange>/g;
+ $line =~ s/\\add\b\s*(.*?)\\add\*/<transChange type=\"added\">$1<\/transChange>/g;
$line =~ s/_/ /g;
More information about the sword-cvs
mailing list