[sword-svn] r305 - trunk/modules/portuguese
refdoc at crosswire.org
refdoc at crosswire.org
Thu Sep 23 18:18:39 MST 2010
Author: refdoc
Date: 2010-09-23 18:18:39 -0700 (Thu, 23 Sep 2010)
New Revision: 305
Modified:
trunk/modules/portuguese/preface.xsl
trunk/modules/portuguese/text.xsl
trunk/modules/portuguese/transform.pl
Log:
more improvements to reading the xml files. Better USFM
Modified: trunk/modules/portuguese/preface.xsl
===================================================================
--- trunk/modules/portuguese/preface.xsl 2010-09-18 01:39:08 UTC (rev 304)
+++ trunk/modules/portuguese/preface.xsl 2010-09-24 01:18:39 UTC (rev 305)
@@ -8,7 +8,7 @@
<xsl:template match="page"><xsl:apply-templates/></xsl:template>
<xsl:template match="Intro_footer"><xsl:apply-templates/></xsl:template>
-<xsl:template match="intro_para">
+<xsl:template match="Intro_para">
\ip <xsl:value-of select="."/></xsl:template>
<xsl:template match="Intro_title">
@@ -22,9 +22,11 @@
\p
</xsl:template>
+<xsl:template match="bookname2">\hi<xsl:value-of select="."/>\hi*</xsl:template>
+
<xsl:template match="Lords_Name">\nd <xsl:value-of select="."/> \nd*</xsl:template>
+<xsl:template match="title"/>
-
<xsl:template match="img">\fig <xsl:value-of select="@src"/>|<xsl:value-of select="@alt"/> \fig*</xsl:template>
<xsl:strip-space elements="*"/>
Modified: trunk/modules/portuguese/text.xsl
===================================================================
--- trunk/modules/portuguese/text.xsl 2010-09-18 01:39:08 UTC (rev 304)
+++ trunk/modules/portuguese/text.xsl 2010-09-24 01:18:39 UTC (rev 305)
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
- <xsl:output method="text"/>
+<xsl:output method="text"/>
<xsl:template match="/"><xsl:apply-templates/></xsl:template>
<xsl:template match="page"><xsl:apply-templates/></xsl:template>
@@ -31,14 +31,14 @@
<xsl:template match="Lords_Name">\nd <xsl:value-of select="."/> \nd*</xsl:template>
-<xsl:template match="ref_text"></xsl:template>
-<xsl:template match="ref_key"></xsl:template>
-<xsl:template match="ref_no"></xsl:template>
-<xsl:template match="refverse_no"></xsl:template>
-<xsl:template match="refchapter_no"></xsl:template>
-<xsl:template match="font"></xsl:template>
-<xsl:template match="page_footer"></xsl:template>
-<xsl:template match="title"></xsl:template>
+<xsl:template match="ref_text"/>
+<xsl:template match="ref_key"/>
+<xsl:template match="ref_no"/>
+<xsl:template match="refverse_no"/>
+<xsl:template match="refchapter_no"/>
+<xsl:template match="font"/>
+<xsl:template match="page_footer"/>
+<xsl:template match="title"/>
@@ -46,6 +46,4 @@
<xsl:strip-space elements="*"/>
-</xsl:stylesheet>
-
-
+</xsl:stylesheet>
\ No newline at end of file
Modified: trunk/modules/portuguese/transform.pl
===================================================================
--- trunk/modules/portuguese/transform.pl 2010-09-18 01:39:08 UTC (rev 304)
+++ trunk/modules/portuguese/transform.pl 2010-09-24 01:18:39 UTC (rev 305)
@@ -2,20 +2,102 @@
use XML::LibXSLT;
use XML::LibXML;
+use utf8;
+
+
my @files=`ls -1 *.xml`;
+my %books = qw(
+01GEN.xml GEN
+02EXO.xml EXO
+03LEV.xml LEV
+04NUM.xml NUM
+05DEU.xml DEU
+06JOS.xml JOS
+07JUDG.xml JDG
+08RUT.xml RUT
+091SAM.xml 1SA
+102SAM.xml 2SA
+111KGS.xml 1KI
+122KGS.xml 2KI
+131CHR.xml 1CH
+142CHR.xml 2CH
+15ESRA.xml EZR
+16NEH.xml NEH
+17TOB.xml TOB
+18JUDIT.xml JDT
+19EST.xml EST
+201MAK.xml 1MA
+212MAK.xml 2MA
+22JOB.xml JOB
+23PSA.xml PSA
+24PRO.xml PRO
+25ECL.xml ECC
+26SONG.xml SNG
+27WIS.xml WIS
+28SIR.xml SIR
+29ISA.xml ISA
+30JER.xml JER
+31LAM.xml LAM
+32BAR.xml BAR
+33EZE.xml EZK
+34DAN.xml DAN
+35HOS.xml HOS
+36JOEL.xml JOL
+37AMOS.xml AMO
+38OBA.xml OBA
+39JONAS.xml JON
+40MIC.xml MIC
+41NAH.xml NAM
+42HAB.xml HAB
+43ZEPH.xml ZEP
+44HAG.xml HAG
+45HAB.xml ZEC
+46MAL.xml MAL
+47MAT.xml MAT
+48MRK.xml MRK
+49LUK.xml LUK
+50JHN.xml JHN
+51ACTS.xml ACT
+52ROM.xml ROM
+531COR.xml 1CO
+542COR.xml 2CO
+55GAL.xml GAL
+56EPH.xml EPH
+57PHIL.xml PHP
+58COL.xml COL
+591THES.xml 1TH
+602THES.xml 2TH
+611TIM.xml 1TI
+622TIM.xml 2TI
+63TIT.xml TIT
+64PHLM.xml PHM
+65HEB.xml HEB
+66JAM.xml JAS
+671PET.xml 1PE
+682PET.xml 2PE
+691JHN.xml 1JN
+702JHN.xml 2JN
+713JHN.xml 3JN
+72JUDE.xml JUD
+73REV.xml REV
+);
+
+
foreach (@files){
my @lines;
- my @text;
+ my $text;
my $tag;
-
+ my $preface;
+ my @preface;
chop;
open TEXT, ">>$_.text.xml";
open USFM, ">>$_.text.sfm";
open PREFACE, ">>$_.preface.xml";
+ open PREFACEUSFM, ">>$_.preface.sfm";
chomp(@lines=`cat $_`);
@@ -24,18 +106,39 @@
foreach (@lines) {
s/(size=\"20\"\ face=\".*?\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\"\ $1/g;
+ s/(size=\"20\"\ face=\".*?\-BoldItalic\"\ color=\"\#EC008C\")/class=\"chapter\"\ $1/g; # Deuterocanonical Chapters in Esther and Daniel
+ s/(size=\"19\"\ face=\".*\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\" $1/g; # Psalsm
}
-
+
SPLIT: foreach (@lines) {
if (/chapter/) {
- print (PREFACE "</page></pdf2xml>");
- $text='<?xml version="1.0" encoding="utf-8" ?><pdf2xml><page>';
+ push (@preface, "</page></pdf2xml>");
+ $text='<?xml version="1.0"?><pdf2xml><page>';
last SPLIT;
}
else {
- s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"intro_para\"\ $1/g;
- print (PREFACE $_."\n");
+ s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g;
+ s/(size=\"5\"\ face=\".*?Helvetica\-Bold\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g;
+ s/(size=\"5\"\ face=\".*?Helvetica\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g;
+ s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\"\ $1/g;
+ s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"Intro_para\"\ $1/g;
+ s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Intro_para\"\ $1/g;
+ s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ $1/g;
+ s/(size=\"14\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ $1/g;
+ s/<text.*?>/<text>\ $1/g;
+ s/(size=\"8\"\ face=\".*?\-BoldItalic\" color=\"\#231F20\")/class=\"Intro_title_2\"\ $1/g;
+ s/(size=\"7\"\ face=\".*?\-BoldItalic\"\ color=\"\#231F20\")/class=\"Intro_title_ref"\ $1/g;
+ s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"reference\"\ $1/g;
+ s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#231F20\")/class=\"Intro_outline\" $1/g;
+ s/(size=\"7\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Verse_Range\" $1/g;
+ s/(size=\"7\"\ face=\".*?\+Helvetica\"\ color=\"\#231F20\")/class=\"Intro_footer\" $1/g;
+ s/(size=\"7\"\ face=\".*?\+Helvetica-Bold\"\ color=\"\#231F20\")/class=\"Image_title\" $1/g;
+ s/(size=\"43\"\ face=\".*?\-Italic\"\ color=\"\#6D6E70\")/class=\"Intro_initial\"$1/g;
+ s/(size=\"11\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\" $1/g;
+ s/(size=\"6\"\ face=\".*?\-Oblique\"\ color=\"\#231F20\")/class=\"Image_ref\" $1/g;
+ s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g;
+ push( @preface, $_."\n");
$_="";
}
}
@@ -46,6 +149,7 @@
s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g;
s/(size=\"4\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"verse_no\"\ \ $1/g;
s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g;
+ s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g;
s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"section_title\"\ $1/g;
s/(size=\"7\"\ face=\".*?\-Bold\"\ color=\"\#231F20\")/class=\"refverse_no\"\ $1/g;
s/(size=\"9\"\ face=\".*?\-Bold\"\ color=\"\#231F20\">)/class=\"refchapter_no\"\ $1/g;
@@ -53,7 +157,6 @@
s/(size=\"7\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"ref_text\"\ $1/g;
s/(size=\"7\"\ face\=\".*?\+Helvetica\"\ color=\"\#231F20\")/class=\"page_footer\"\ $1/g;
s/(size=\"6\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g;
- # s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\"\ $1/g;
s/<text.*?>/<text>\ $1/g;
s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"ref_key\"\ $1/g;
s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g;
@@ -84,25 +187,88 @@
$text =~ s/(S)<\/maintext>\n\s*<Lords_Name>\s*(ENHOR)<\/Lords_Name>\n\s*<maintext>/<Lords_Name>$1$2<\/Lords_Name>/g;
$text =~ s/(<verse_no>.*?<\/verse_no>)\n\s*(<maintext>.*?<\/maintext>)\n\s*?:(<verse_no>)/<verse>$1$2<\/verse>\n<verse_no>/g;
+
+
+ foreach (@preface) {
+
+
+
+ s/<font\ class=\"(.*?)\".*?>/<$1>/;
+ $tag = $1;
+ s/<\/font/"<\/".$tag/e;
+ s/<\/>/<\/font>/;
+ }
+
+ $preface = join ("", at preface);
+
+
+ $preface =~ s/\s+/\ /g;
+ $preface =~ s/<(|\/)text>//g;
+ $preface =~ s/(.)<\/Intro_para>\s*<bookname2>(.*?)<\/bookname2>/<bookname2>$1$2<\/bookname2><\/Intro_para>/g;
+ $preface =~ s/(S|D)\s*<\/Intro_para>\s*\n*\s*<Lords_Name>\s*(ENHOR|EUS)<\/Lords_Name>\s*\n*\s*<Intro_para>/<Lords_Name>$1$2<\/Lords_Name>/g;
+ # $preface =~ s/(D)<\/Intro_para>\s*\n*\s*<Lords_Name>\s*(EUS)<\/Lords_Name>\s*\n*\s*<Intro_para>/<Lords_Name>$1$2<\/Lords_Name>/g;
+ $preface =~ s/(S|D)\s*<Lords_Name>\s*(ENHOR|EUS)<\/Lords_Name>/<Lords_Name>$1$2<\/Lords_Name>/g;
+ # $preface =~ s/(D)\s*<Lords_Name>\s*(EUS)<\/Lords_Name>/<Lords_Name>$1$2<\/Lords_Name>/g;
+ $preface =~ s/<\/Intro_para>\s*\n*\s*<Intro_para>//g;
+ $preface =~ s/<\/Intro_title_2>\s*<Intro_para>(.*?)<\/Intro_para>/\ $1<\/Intro_title_2>/g;
+ $preface =~ s/<\/Intro_outline>\s*<Intro_title_2>\s*(.*?)<\/Intro_title_2>/$1<\/Intro_outline>/g;
+ $preface =~ s/\/>\s*<\/Intro_footer>\s*<Image_title>(.*?)<\/Image_title>\s*?<Intro_footer>(.*?)<\/Intro_footer>/\ alt=\"$1$2\"\/><\/Intro_footer>/g;
+ # $preface =~ s/png\"\/>\s*<\/Intro_para>\s*<Image_title>(.*?)<\/Image_title>\s*?<Intro_footer>(.*?)<\/Intro_footer>\s*?<Image_ref>(.*?)<\/Image_ref>/png\"\ alt=\"$1$2\"\ ref=\"$3\"\/><\/Intro_para>/g;
+ $preface =~ s/<Intro_initial>(.*?)<\/Intro_initial>\s<Intro_para>/<Intro_para>$1\ /g;
+ $preface =~ s/\s+/\ /g;
+ $preface =~ s/-\ //g;
+ $preface =~ s/<Intro/\n<Intro/g;
+
+
+
# create an instance of XSL::XSLT processor
print TEXT $text;
- close text;
-
+ close TEXT;
+ print PREFACE $preface;
+ close PREFACE;
+
my $parser = new XML::LibXML;
my $xslt = new XML::LibXSLT;
+
+ my $source = $parser->parse_string($preface);
+ my $style_doc = $parser->parse_file('preface.xsl');
+
+ my $stylesheet = $xslt->parse_stylesheet($style_doc);
+ my $results = $stylesheet->transform($source);
+
+ print "I am still working on $_ \n";
+
+ print USFM "\\id $books{$_}";
+ print USFM $stylesheet->output_string($results);
+
+
my $source = $parser->parse_string($text);
- my $style_doc = $parser->parse_file('transform.xsl');
+ my $style_doc = $parser->parse_file('text.xsl');
my $stylesheet = $xslt->parse_stylesheet($style_doc);
my $results = $stylesheet->transform($source);
+
+ print "I am working on $_ \n";
+
+ # print USFM "\\id $books{$_}";
+ @lines = split( "\n", $stylesheet->output_string($results));
+
+ foreach (@lines) {
+
+ s/^\s*–\s*$//;
+ s/\\nd\s+E\s+\\nd*\s+-\\nd\s+NHOR\s+\\nd\*/\\nd SENHOR\\nd\*/g;
+ s/^\\v\ \ /\\p\n\\v\ /;
+ s/-\ //g;
+ s/(\\v\s+[0123456789]+)\(/$1\ (/;
- print USFM $stylesheet->output_string($results);
+ }
+ print USFM join("\n", @lines);
close USFM;
-
- $text="";
-
+
+
+ close PREFACEUSFM;
}
More information about the sword-cvs
mailing list