[sword-svn] r313 - trunk/modules/crossreferences
refdoc at crosswire.org
refdoc at crosswire.org
Fri Nov 5 17:12:39 MST 2010
Author: refdoc
Date: 2010-11-05 17:12:39 -0700 (Fri, 05 Nov 2010)
New Revision: 313
Modified:
trunk/modules/crossreferences/xreffix.pl
Log:
This is a complete make over of xreffix now using the Sword API via the perl bindings. I have written it in a fashion which should allow adapting it easily to different texts
Modified: trunk/modules/crossreferences/xreffix.pl
===================================================================
--- trunk/modules/crossreferences/xreffix.pl 2010-11-05 22:51:21 UTC (rev 312)
+++ trunk/modules/crossreferences/xreffix.pl 2010-11-06 00:12:39 UTC (rev 313)
@@ -1,4 +1,5 @@
#!/usr/bin/perl
+
## Creates proper OSIS references where usfm2osis has failed.
## Licensed under the standard BSD license:
@@ -40,7 +41,6 @@
use Sword;
use feature "state";
-
$version = "1.1";
$osisVersion = "2.1.1";
@@ -48,7 +48,7 @@
$rev = '$Rev: 231 $';
$mgr = new Sword::SWMgr();
-$module = $mgr->getModule('KJV');
+$module = $mgr->getModule('GERSCH2000');
if (scalar(@ARGV) < 1) {
print "xreffix.pl -- fixes crossreferences in OSIS files where usfm2osis.pl has failed. version $version\nRevision $rev ($date)\nSyntax: xreffix.pl <input filename> [-o <output-file>] [-l <xreflocale>].\n";
@@ -59,7 +59,7 @@
$outputFilename = "$ARGV[2]";
}
else {
- $outputFilename = "$ARGV[0].fixed";
+ $outputFilename = "$ARGV[0].fixed.xml";
}
if ($ARGV[1] eq "-l") {
$locale = "$ARGV[2];"
@@ -83,8 +83,13 @@
$c_chapter="1";
$c_verse="1";
+addRefs();
+readLocale();
+
foreach (@data) {
-
+
+ # the actual document locale takes preference. Not sure if this is a good decision
+
if (/xml:lang\=\"(.+?)\"/) {
if ($locale ne $1) {
print "This document is in the locale of ".$1."\n";
@@ -92,30 +97,130 @@
}
}
+
+ # The conversion to OSIS requires a context scope for single verse references.
+ # This needs to be always maintained and passed on.
+
if (/<div\ type\=\"book\"\ osisID=\"(.+?)\">/) {
$c_book=$1;
print "\n"."Now working on ".$c_book."\n";
}
if (/<chapter\ sID\=\".*?\.([0-9]+)\"/) {
$c_chapter=$1;
- print ".";
+ print "\n"."Now working on ".$c_book.$c_chapter."\n";
+
}
if (/<verse\ sID\=\".*?\.([0-9]+)\"/) {
$c_verse=$1;
}
- my $scope= new Sword::VerseKey;
- $scope->setText($c_book.$c_chapter.$c_verse);
- s/<note\ type=\"crossReference\">(.*?)<\/note>/"<note n=\"".note_index()."\" osisID=\"$c_book.$c_chapter.$c_verse!crossReference.".note_index()."\" osisRef=\"$c_book.$c_chapter.$c_verse\" type=\"crossReference\">".Sword::VerseKey::convertToOSIS($1, $scope)."<\/note>"/eg;
+
+ # Finally the isolated references are passed to the actual conversion routine
+
+ s/<reference>(.*?)<\/reference>/createReference($1,$c_book,$c_chapter,$c_verse)/eg;
+
+
}
+
+foreach (@data) {
+ s/<note\ type=\"crossReference\">(.*?)<\/note>/"<note n=\"".note_index()."\" osisID=\"$c_book.$c_chapter.$c_verse!crossReference.".note_index()."\" osisRef=\"$c_book.$c_chapter.$c_verse\" type=\"crossReference\">".$1."<\/note>"/eg;
+ }
+
print (OUTF @data);
close OUTF;
-sub note_index {
+####################################################################################
+# In the conversion routine the references need to get cleaned up and prepared for conversion
+
+sub createReference() {
+
+ my $ref = @_[0];
+ print "I got this here: ".$ref."\n";
+ print "this is the current scope: ". at _[1].".". at _[2].".". at _[3]."\n";
+ my $scope= new Sword::VerseKey;
+ $scope->setText(@_[1].".". at _[2].".". at _[3]);
+
+
+ # This is about changing the various separators etc for non-English vocales into English ones
+ # You need to be careful if you change any of the indicators. The order of changes currently done is for German.
+ # Look at the list given in sub readLocale.
+ # If your text is in English or marked up along English lines you will need to comment out a few sections.
+
+ $ref =~ s/$sep_cv/:/g;
+ $ref =~ s/;$ind_v\ /;\ /g;
+ $ref =~ s/^$ind_v//;
+ # $ref =~ s/$sep_l/,/g;
+ $ref =~ s/\./,/g;
+ # Sometimes xrefs have prose content apart from the actual references.
+
+ my @refs = split(/$fill_start/,$ref);
+
+ my $return='';
+ foreach (@refs) {
+
+ # I am sure this can be done more elegantly, but I have currently no clue
+ # Basically repetitive prose content in xrefs like "compare" needs to get "neutralised prior to conversion to OSIS,
+ # but it should not get lost, so I attach it here to the return string
+
+ if (/^$fill_end/) {
+ $return = $return." ".$fill;
+ $_ =~ s/^$fill_end//;
+ }
+
+ print "I put this here in:".$_."\n";
+ $return = $return.Sword::VerseKey::convertToOSIS($_, $scope) ;
+ }
+ print "and I created that: ".$return."\n";
+
+ # After the cleansing and conversion in to English standard we want to recreate in the reference prose the original separators
+
+ $return =~ s/(>.*?),(?=.*?<)/$1.$sep_l.$2/eg;
+ $return =~ s/(>.*?):(?=.*?<)/$1$sep_cv$2/g;
+
+
+ $return;
+ }
+
+sub note_index {
+
my @note = qw(a a b b c c d d e e f f g g h h i i j j k k l l m m n n o o p p q q r r s s t t u u v v w w x x y y z z );
state $i=0;
- $return = $note[$i % 52];
+ my $return = $note[$i % 52];
++$i;
$return;
}
+
+#####################################################################################
+# Edit the following subroutines for your particular project
+
+# Many locale have different indicators for book/chapter/verse separation etc. The conversion routine requires English standard separators
+
+sub readLocale () {
+
+ $sep_bc = ' '; # separator between books and chapters
+ $sep_cv = ','; # separator between chapters and verses
+ $ind_v = 'V\.'; # indicator for single verse - unfortunatly this will get lost in the conversion.
+ $sep_l = '\.'; # separator for list of chapters or verses
+
+ $fill_start ="vg"; # indicators for "compare"
+ $fill_end ="l"; # / -> reads as "vgl."
+ $fill = "vgl\."#/
+
+}
+
+# Your text might have references which are not yet marked up. Here is your chance to do so.
+
+sub addRefs () {
+
+ foreach (@data) {
+
+
+ # references included inline
+ s/\(z\.B\.\ (.*?)\)/\(z\.B\.\ <reference>$1<\/reference>\)/g;
+
+ # parallel reference subtitles
+ s/<title\ type=\"parallel\">(.*?)<\/title>/<title\ type=\"parallel\"><reference>$1<\/reference><\/title>/g;
+ }
+
+}
\ No newline at end of file
More information about the sword-cvs
mailing list