[sword-svn] r305 - trunk/modules/portuguese

refdoc at crosswire.org refdoc at crosswire.org
Thu Sep 23 18:18:39 MST 2010


Author: refdoc
Date: 2010-09-23 18:18:39 -0700 (Thu, 23 Sep 2010)
New Revision: 305

Modified:
   trunk/modules/portuguese/preface.xsl
   trunk/modules/portuguese/text.xsl
   trunk/modules/portuguese/transform.pl
Log:
more improvements to reading the xml files. Better USFM


Modified: trunk/modules/portuguese/preface.xsl
===================================================================
--- trunk/modules/portuguese/preface.xsl	2010-09-18 01:39:08 UTC (rev 304)
+++ trunk/modules/portuguese/preface.xsl	2010-09-24 01:18:39 UTC (rev 305)
@@ -8,7 +8,7 @@
 <xsl:template match="page"><xsl:apply-templates/></xsl:template>
 <xsl:template match="Intro_footer"><xsl:apply-templates/></xsl:template>
 
-<xsl:template match="intro_para">
+<xsl:template match="Intro_para">
 \ip <xsl:value-of select="."/></xsl:template>
 
 <xsl:template match="Intro_title">
@@ -22,9 +22,11 @@
 \p
 </xsl:template>
 
+<xsl:template match="bookname2">\hi<xsl:value-of select="."/>\hi*</xsl:template>
+
 <xsl:template match="Lords_Name">\nd <xsl:value-of select="."/> \nd*</xsl:template>
+<xsl:template match="title"/>
 
-
 <xsl:template match="img">\fig <xsl:value-of select="@src"/>|<xsl:value-of select="@alt"/> \fig*</xsl:template>
 
 <xsl:strip-space elements="*"/>

Modified: trunk/modules/portuguese/text.xsl
===================================================================
--- trunk/modules/portuguese/text.xsl	2010-09-18 01:39:08 UTC (rev 304)
+++ trunk/modules/portuguese/text.xsl	2010-09-24 01:18:39 UTC (rev 305)
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <xsl:stylesheet version="1.0" 
     xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
- <xsl:output method="text"/>
+<xsl:output method="text"/>
  
 <xsl:template match="/"><xsl:apply-templates/></xsl:template>
 <xsl:template match="page"><xsl:apply-templates/></xsl:template>
@@ -31,14 +31,14 @@
 
 <xsl:template match="Lords_Name">\nd <xsl:value-of select="."/> \nd*</xsl:template>
 
-<xsl:template match="ref_text"></xsl:template>
-<xsl:template match="ref_key"></xsl:template>
-<xsl:template match="ref_no"></xsl:template>
-<xsl:template match="refverse_no"></xsl:template>
-<xsl:template match="refchapter_no"></xsl:template>
-<xsl:template match="font"></xsl:template>
-<xsl:template match="page_footer"></xsl:template>
-<xsl:template match="title"></xsl:template>
+<xsl:template match="ref_text"/>
+<xsl:template match="ref_key"/>
+<xsl:template match="ref_no"/>
+<xsl:template match="refverse_no"/>
+<xsl:template match="refchapter_no"/>
+<xsl:template match="font"/>
+<xsl:template match="page_footer"/>
+<xsl:template match="title"/>
 
 
 
@@ -46,6 +46,4 @@
 <xsl:strip-space elements="*"/>
 
 
-</xsl:stylesheet>
-
-
+</xsl:stylesheet>
\ No newline at end of file

Modified: trunk/modules/portuguese/transform.pl
===================================================================
--- trunk/modules/portuguese/transform.pl	2010-09-18 01:39:08 UTC (rev 304)
+++ trunk/modules/portuguese/transform.pl	2010-09-24 01:18:39 UTC (rev 305)
@@ -2,20 +2,102 @@
 
 use XML::LibXSLT;
 use XML::LibXML;
+use utf8;
 
+
+
 my @files=`ls -1 *.xml`;
 
+my %books = qw( 
+01GEN.xml	GEN
+02EXO.xml	EXO
+03LEV.xml	LEV
+04NUM.xml	NUM
+05DEU.xml	DEU
+06JOS.xml	JOS
+07JUDG.xml	JDG
+08RUT.xml	RUT
+091SAM.xml	1SA
+102SAM.xml	2SA
+111KGS.xml	1KI
+122KGS.xml	2KI
+131CHR.xml	1CH
+142CHR.xml	2CH
+15ESRA.xml	EZR
+16NEH.xml	NEH
+17TOB.xml	TOB
+18JUDIT.xml	JDT
+19EST.xml	EST
+201MAK.xml	1MA
+212MAK.xml	2MA
+22JOB.xml	JOB
+23PSA.xml	PSA
+24PRO.xml	PRO
+25ECL.xml	ECC
+26SONG.xml	SNG
+27WIS.xml	WIS
+28SIR.xml	SIR
+29ISA.xml	ISA
+30JER.xml	JER
+31LAM.xml	LAM
+32BAR.xml	BAR
+33EZE.xml	EZK
+34DAN.xml	DAN
+35HOS.xml	HOS
+36JOEL.xml	JOL
+37AMOS.xml	AMO
+38OBA.xml	OBA
+39JONAS.xml	JON
+40MIC.xml	MIC
+41NAH.xml	NAM
+42HAB.xml	HAB
+43ZEPH.xml	ZEP
+44HAG.xml	HAG
+45HAB.xml	ZEC
+46MAL.xml	MAL
+47MAT.xml	MAT
+48MRK.xml	MRK
+49LUK.xml	LUK
+50JHN.xml	JHN
+51ACTS.xml	ACT
+52ROM.xml	ROM
+531COR.xml	1CO
+542COR.xml	2CO
+55GAL.xml	GAL
+56EPH.xml	EPH
+57PHIL.xml	PHP
+58COL.xml	COL
+591THES.xml	1TH
+602THES.xml	2TH
+611TIM.xml	1TI
+622TIM.xml	2TI
+63TIT.xml	TIT
+64PHLM.xml	PHM
+65HEB.xml	HEB
+66JAM.xml	JAS
+671PET.xml	1PE
+682PET.xml	2PE
+691JHN.xml	1JN
+702JHN.xml	2JN
+713JHN.xml	3JN
+72JUDE.xml	JUD
+73REV.xml	REV
+);
+
+
 foreach (@files){ 
 
         my @lines;
-        my @text;
+        my $text;
         my $tag;
- 
+        my $preface;
+        my @preface;
         
 	chop;
 	open TEXT, ">>$_.text.xml";
 	open USFM, ">>$_.text.sfm";
 	open PREFACE, ">>$_.preface.xml";
+	open PREFACEUSFM, ">>$_.preface.sfm";
 	
 	chomp(@lines=`cat $_`);
 
@@ -24,18 +106,39 @@
  	foreach (@lines) {
  		
 		s/(size=\"20\"\ face=\".*?\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\"\ $1/g;
+		s/(size=\"20\"\ face=\".*?\-BoldItalic\"\ color=\"\#EC008C\")/class=\"chapter\"\ $1/g;  # Deuterocanonical Chapters in Esther and Daniel
+		s/(size=\"19\"\ face=\".*\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\" $1/g; # Psalsm
 	}
-
+	
 SPLIT:   foreach (@lines) {
                 
                 if (/chapter/) {
-                   print (PREFACE "</page></pdf2xml>");
-                   $text='<?xml version="1.0" encoding="utf-8" ?><pdf2xml><page>';
+                   push (@preface, "</page></pdf2xml>");
+                   $text='<?xml version="1.0"?><pdf2xml><page>';
                    last SPLIT;
                    }
                 else {
-                   s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"intro_para\"\ $1/g;   
-                   print (PREFACE $_."\n");
+    		   s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g;
+                   s/(size=\"5\"\ face=\".*?Helvetica\-Bold\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g;
+                   s/(size=\"5\"\ face=\".*?Helvetica\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g;
+                   s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\"\ $1/g;
+                   s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"Intro_para\"\ $1/g;   
+                   s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Intro_para\"\ $1/g;   
+                   s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ $1/g;
+                   s/(size=\"14\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ $1/g;
+    		   s/<text.*?>/<text>\ $1/g;
+		   s/(size=\"8\"\ face=\".*?\-BoldItalic\" color=\"\#231F20\")/class=\"Intro_title_2\"\ $1/g;
+		   s/(size=\"7\"\ face=\".*?\-BoldItalic\"\ color=\"\#231F20\")/class=\"Intro_title_ref"\ $1/g;
+		   s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"reference\"\ $1/g;
+		   s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#231F20\")/class=\"Intro_outline\" $1/g;
+		   s/(size=\"7\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Verse_Range\" $1/g;
+		   s/(size=\"7\"\ face=\".*?\+Helvetica\"\ color=\"\#231F20\")/class=\"Intro_footer\" $1/g;
+		   s/(size=\"7\"\ face=\".*?\+Helvetica-Bold\"\ color=\"\#231F20\")/class=\"Image_title\" $1/g;
+		   s/(size=\"43\"\ face=\".*?\-Italic\"\ color=\"\#6D6E70\")/class=\"Intro_initial\"$1/g;
+		   s/(size=\"11\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\" $1/g;
+		   s/(size=\"6\"\ face=\".*?\-Oblique\"\ color=\"\#231F20\")/class=\"Image_ref\" $1/g;
+		   s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g;
+                   push( @preface, $_."\n");
                    $_="";
                 }
         }
@@ -46,6 +149,7 @@
  		s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g;
 		s/(size=\"4\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"verse_no\"\ \ $1/g;
 		s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g;
+		s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g;
 		s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"section_title\"\ $1/g;
 		s/(size=\"7\"\ face=\".*?\-Bold\"\ color=\"\#231F20\")/class=\"refverse_no\"\ $1/g;
 		s/(size=\"9\"\ face=\".*?\-Bold\"\ color=\"\#231F20\">)/class=\"refchapter_no\"\ $1/g;
@@ -53,7 +157,6 @@
 		s/(size=\"7\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"ref_text\"\ $1/g;
 		s/(size=\"7\"\ face\=\".*?\+Helvetica\"\ color=\"\#231F20\")/class=\"page_footer\"\ $1/g;
 		s/(size=\"6\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"Lords_Name\"\ $1/g;
-		# s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\"\ $1/g;
 		s/<text.*?>/<text>\ $1/g;
 		s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"ref_key\"\ $1/g;
 		s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g;
@@ -84,25 +187,88 @@
 	$text =~ s/(S)<\/maintext>\n\s*<Lords_Name>\s*(ENHOR)<\/Lords_Name>\n\s*<maintext>/<Lords_Name>$1$2<\/Lords_Name>/g;
 	$text =~ s/(<verse_no>.*?<\/verse_no>)\n\s*(<maintext>.*?<\/maintext>)\n\s*?:(<verse_no>)/<verse>$1$2<\/verse>\n<verse_no>/g;
 	
+	
+	
+	foreach (@preface) {
+	
+	
+		 
+		 s/<font\ class=\"(.*?)\".*?>/<$1>/;
+		 $tag = $1;
+		 s/<\/font/"<\/".$tag/e;
+		 s/<\/>/<\/font>/;	 
+	}	 
+	
+	$preface = join ("", at preface);
+	
+	
+	$preface =~ s/\s+/\ /g;
+	$preface =~ s/<(|\/)text>//g;
+	$preface =~ s/(.)<\/Intro_para>\s*<bookname2>(.*?)<\/bookname2>/<bookname2>$1$2<\/bookname2><\/Intro_para>/g;
+	$preface =~ s/(S|D)\s*<\/Intro_para>\s*\n*\s*<Lords_Name>\s*(ENHOR|EUS)<\/Lords_Name>\s*\n*\s*<Intro_para>/<Lords_Name>$1$2<\/Lords_Name>/g;
+	# $preface =~ s/(D)<\/Intro_para>\s*\n*\s*<Lords_Name>\s*(EUS)<\/Lords_Name>\s*\n*\s*<Intro_para>/<Lords_Name>$1$2<\/Lords_Name>/g;
+	$preface =~ s/(S|D)\s*<Lords_Name>\s*(ENHOR|EUS)<\/Lords_Name>/<Lords_Name>$1$2<\/Lords_Name>/g;
+	# $preface =~ s/(D)\s*<Lords_Name>\s*(EUS)<\/Lords_Name>/<Lords_Name>$1$2<\/Lords_Name>/g;
+	$preface =~ s/<\/Intro_para>\s*\n*\s*<Intro_para>//g;
+	$preface =~ s/<\/Intro_title_2>\s*<Intro_para>(.*?)<\/Intro_para>/\ $1<\/Intro_title_2>/g;
+	$preface =~ s/<\/Intro_outline>\s*<Intro_title_2>\s*(.*?)<\/Intro_title_2>/$1<\/Intro_outline>/g;
+	$preface =~ s/\/>\s*<\/Intro_footer>\s*<Image_title>(.*?)<\/Image_title>\s*?<Intro_footer>(.*?)<\/Intro_footer>/\ alt=\"$1$2\"\/><\/Intro_footer>/g;
+	# $preface =~ s/png\"\/>\s*<\/Intro_para>\s*<Image_title>(.*?)<\/Image_title>\s*?<Intro_footer>(.*?)<\/Intro_footer>\s*?<Image_ref>(.*?)<\/Image_ref>/png\"\ alt=\"$1$2\"\ ref=\"$3\"\/><\/Intro_para>/g;
+	$preface =~ s/<Intro_initial>(.*?)<\/Intro_initial>\s<Intro_para>/<Intro_para>$1\ /g;
+        $preface =~ s/\s+/\ /g;
+	$preface =~ s/-\ //g;
+	$preface =~ s/<Intro/\n<Intro/g;
+            
+	
+	
 	# create an instance of XSL::XSLT processor
         print TEXT $text;
-        close text;
-    
+        close TEXT;
+        print PREFACE $preface;
+        close PREFACE;
+        
         my $parser = new XML::LibXML;
         my $xslt   = new XML::LibXSLT;
           
+        
+        my $source     = $parser->parse_string($preface);
+        my $style_doc  = $parser->parse_file('preface.xsl');
+
+        my $stylesheet = $xslt->parse_stylesheet($style_doc);
+        my $results    = $stylesheet->transform($source);
+        
+        print "I am still  working on $_ \n";
+        
+        print USFM "\\id $books{$_}";            
+        print USFM $stylesheet->output_string($results);
+        
+        
         my $source     = $parser->parse_string($text);
-        my $style_doc  = $parser->parse_file('transform.xsl');
+        my $style_doc  = $parser->parse_file('text.xsl');
 
         my $stylesheet = $xslt->parse_stylesheet($style_doc);
         my $results    = $stylesheet->transform($source);
+        
+        print "I am working on $_ \n";
+        
+        # print USFM "\\id $books{$_}";            
+        @lines = split( "\n", $stylesheet->output_string($results));
+        
+        foreach (@lines) {
+            
+            s/^\s*–\s*$//;
+            s/\\nd\s+E\s+\\nd*\s+-\\nd\s+NHOR\s+\\nd\*/\\nd SENHOR\\nd\*/g;
+            s/^\\v\ \ /\\p\n\\v\ /;
+            s/-\ //g;
+            s/(\\v\s+[0123456789]+)\(/$1\ (/;
                     
-        print USFM $stylesheet->output_string($results);
+        }
         
+        print USFM join("\n", @lines);
         close USFM;
-       
-        $text="";
-       
+              
+        
+        close PREFACEUSFM;
 	
 	
 }




More information about the sword-cvs mailing list