[sword-svn] r303 - trunk/modules/portuguese
refdoc at crosswire.org
refdoc at crosswire.org
Tue Sep 14 16:16:34 MST 2010
Author: refdoc
Date: 2010-09-14 16:16:34 -0700 (Tue, 14 Sep 2010)
New Revision: 303
Modified:
trunk/modules/portuguese/transform.pl
Log:
updated, produces now usfm for the text part
Modified: trunk/modules/portuguese/transform.pl
===================================================================
--- trunk/modules/portuguese/transform.pl 2010-09-09 22:47:43 UTC (rev 302)
+++ trunk/modules/portuguese/transform.pl 2010-09-14 23:16:34 UTC (rev 303)
@@ -1,25 +1,48 @@
#!/usr/bin/perl
+use XML::LibXSLT;
+use XML::LibXML;
my @files=`ls -1 *.xml`;
-# my @ident=`cat books`;
foreach (@files){
my @lines;
my @text;
my $tag;
- # my %vs;
- # my %kw;
+
chop;
- open SIMPLE, ">>$_.simple.xml";
+ open TEXT, ">>$_.text.xml";
+ open USFM, ">>$_.text.sfm";
+ open PREFACE, ">>$_.preface.xml";
+
chomp(@lines=`cat $_`);
- # @lines[1]="\\id @ident[$_-1] ";
+
foreach (@lines) {
+ s/(size=\"20\"\ face=\".*?\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\"\ $1/g;
+ }
+
+SPLIT: foreach (@lines) {
+
+ if (/chapter/) {
+ print (PREFACE "</page></pdf2xml>");
+ $text='<?xml version="1.0" encoding="utf-8" ?><pdf2xml><page>';
+ last SPLIT;
+ }
+ else {
+ s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"intro_para\"\ $1/g;
+ print (PREFACE $_."\n");
+ $_="";
+ }
+ }
+
+
+ foreach (@lines) {
+
s/(size=\"17\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"bookname\"\ \ $1/g;
s/(size=\"4\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"verse_no\"\ \ $1/g;
s/(size=\"8\"\ face=\".*?\-Roman\"\ color=\"\#231F20\")/class=\"maintext\"\ \ $1/g;
@@ -33,11 +56,12 @@
# s/(size=\"8\"\ face=\".*?\-Bold\"\ color=\"\#EC008C\")/class=\"Intro_title\"\ $1/g;
s/<text.*?>/<text>\ $1/g;
s/(size=\"7\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"ref_key\"\ $1/g;
- s/(size=\"20\"\ face=\".*?\-Bold\"\ color=\"\#6D6E70\")/class=\"chapter\"\ $1/g;
- s/(size=\"8\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"intro_para\"\ $1/g;
s/(size=\"6\"\ face=\".*?\-Italic\"\ color=\"\#231F20\")/class=\"bookname2\"\ $1/g;
s/$/\n/;
- }
+ }
+
+
+
foreach (@lines) {
@@ -51,7 +75,7 @@
}
- $text = join ("", at lines);
+ $text = $text.join ("", at lines);
$text =~ s/\n\s*<text>//g;
@@ -60,16 +84,27 @@
$text =~ s/(S)<\/maintext>\n\s*<Lords_Name>\s*(ENHOR)<\/Lords_Name>\n\s*<maintext>/<Lords_Name>$1$2<\/Lords_Name>/g;
$text =~ s/(<verse_no>.*?<\/verse_no>)\n\s*(<maintext>.*?<\/maintext>)\n\s*?:(<verse_no>)/<verse>$1$2<\/verse>\n<verse_no>/g;
- # @lines = split(/\n/,$text);
- # foreach (@lines) {
-
-
+ # create an instance of XSL::XSLT processor
+ print TEXT $text;
+ close text;
+
+ my $parser = new XML::LibXML;
+ my $xslt = new XML::LibXSLT;
+
+ my $source = $parser->parse_string($text);
+ my $style_doc = $parser->parse_file('transform.xsl');
+ my $stylesheet = $xslt->parse_stylesheet($style_doc);
+ my $results = $stylesheet->transform($source);
+
+ print USFM $stylesheet->output_string($results);
+
+ close USFM;
+
+ $text="";
+
- print (SIMPLE $text);
- close SIMPLE;
-
}
More information about the sword-cvs
mailing list