[sword-svn] r64 - trunk/versification/mapper

mgruner at crosswire.org mgruner at crosswire.org
Sat Jun 24 03:41:02 MST 2006


Author: mgruner
Date: 2006-06-24 03:40:53 -0700 (Sat, 24 Jun 2006)
New Revision: 64

Modified:
   trunk/versification/mapper/create_db.pl
Log:
further progess; create_db is almost complete
TODO: add convenience tables
TODO: add demonstration script how to use the db


Modified: trunk/versification/mapper/create_db.pl
===================================================================
--- trunk/versification/mapper/create_db.pl	2006-06-23 20:45:46 UTC (rev 63)
+++ trunk/versification/mapper/create_db.pl	2006-06-24 10:40:53 UTC (rev 64)
@@ -27,49 +27,113 @@
 	close(FH);
 }
 
-loadFromFile( "data/refsysmap.unl", 	\@mappings );
-loadFromFile( "data/Bible.xml", 			\@bible );
-loadFromFile( "data/Bible.NAB.xml", 	\@bible_nab );
-loadFromFile( "data/Bible.NJB.xml", 	\@bible_njb );
-loadFromFile( "data/Bible.ORG.xml", 	\@bible_org );
-loadFromFile( "data/Bible.Vul.xml", 	\@bible_vul );
-loadFromFile( "data/Bible.LXX.xml", 	\@bible_lxx );
-
 ##########################
-#INIT DB
+#INIT DBs
 ##########################
 unlink "v11n-mapper.db";
-my $dbh = DBI->connect("dbi:SQLite:dbname=v11n-mapper.db","","") || die "can't connect to SQLite database\n";
-$dbh->{unicode} = 1;
-$dbh->{AutoCommit} = 0;  # enable transactions
-$dbh->{RaiseError} = 1;
+my $dbh_mapper = DBI->connect("dbi:SQLite:dbname=v11n-mapper.db","","") || die "can't connect to SQLite database\n";
+$dbh_mapper->{unicode} = 1;
+$dbh_mapper->{AutoCommit} = 0;  # enable transactions
+$dbh_mapper->{RaiseError} = 0;
+unlink "v11n-schema.db";
+my $dbh_schema = DBI->connect("dbi:SQLite:dbname=v11n-schema.db","","") || die "can't connect to SQLite database\n";
+$dbh_schema->{unicode} = 1;
+$dbh_schema->{AutoCommit} = 0;  # enable transactions
+$dbh_schema->{RaiseError} = 1;
 
-sub feedScheme
+sub feedSchema
 {
 	my $dbh = shift;
 	my $table_name = shift;
 	my $array_ref = shift;
 
-	$dbh->do("CREATE TABLE $table_name (osisID TEXT NOT NULL)") || die $!;
-	#TODO: ADD INDEX!!
+	print "Feeding schema $table_name into DB.\n";
+
+	$dbh->do("CREATE TABLE $table_name (osisID TEXT NOT NULL UNIQUE)") || die $!;
 	
-	foreach my $osisID_line (grep(m/<osisID code/, @{$array_ref}))
+	my ($osisID, %osisIDs_in_DB);
+	my @lines = grep(m/<osisID code/, @{$array_ref});
+	die "no lines available!\n" unless @lines;
+
+	foreach my $osisID_line (@lines)
 	{
-		my ($osisID) = $osisID_line =~ m/code="(.+)"/;
-		$dbh->do("INSERT INTO $table_name VALUES (\'$osisID\')") || die $!;
+		($osisID) = $osisID_line =~ m/code="(.+)"/;
+		next if (exists($osisIDs_in_DB{$osisID})); #keep osisID column unique
+		$dbh->do("INSERT INTO $table_name VALUES (\'$osisID\')") || die "$!\n";
+		$osisIDs_in_DB{$osisID} = 1;
 	}
 	$dbh->commit();
 }
 
-&feedScheme($dbh, "scheme_bible", \@bible);
-&feedScheme($dbh, "scheme_bible_nab", \@bible_nab);
-&feedScheme($dbh, "scheme_bible_njb", \@bible_njb);
-&feedScheme($dbh, "scheme_bible_org", \@bible_org);
-&feedScheme($dbh, "scheme_bible_vul", \@bible_vul);
-&feedScheme($dbh, "scheme_bible_lxx", \@bible_lxx);
+sub feedMapping
+{
+	my $dbh = shift;
+	my $scheme1 = shift;
+	my $source = shift;
+	my $scheme2 = shift;
+	my $target = shift;
+	my $array_ref = shift;
 
-#print @{$dbh->selectcol_arrayref("SELECT osisID FROM scheme_bible")};
+	print "Feeding mapping \"$scheme1 to $scheme2\" into DB.\n";
 
-$dbh->disconnect();
+	$dbh->do("CREATE TABLE ".$source."_to_".$target." (source TEXT NOT NULL UNIQUE, target TEXT NOT NULL)") || die $!;
+	
+	my ($source_osisID, $target_osisID, %source_osisIDs_in_DB);
 
+	my @lines = grep(m/$scheme1:.+:$scheme2:.+/, @{$array_ref});
+	die "no lines available!\n" unless @lines;
+	
+	foreach my $mapping_line (@lines)
+	{
+		($source_osisID, $target_osisID) = $mapping_line =~ m/$scheme1:(.+):$scheme2:(.+)/;
+		next if ($source_osisID eq $target_osisID); #don't record something that does not need mapping
+		next if ( exists($source_osisIDs_in_DB{$source_osisID}) ); #keep source column unique
+
+		$dbh->do("INSERT INTO ".$source."_to_".$target." VALUES (\'$source_osisID\', \'$target_osisID\')") || die "$!\n";
+		$source_osisIDs_in_DB{$source_osisID} = 1;
+	}
+	$dbh->commit();
+}
+
+loadFromFile( "data/Bible.xml", 			\@bible );
+&feedSchema($dbh_schema, "bible", \@bible);
+ at bible=();
+
+loadFromFile( "data/Bible.NAB.xml", 	\@bible_nab );
+&feedSchema($dbh_schema, "bible_nab", \@bible_nab);
+ at bible_nab=();
+
+loadFromFile( "data/Bible.NJB.xml", 	\@bible_njb );
+&feedSchema($dbh_schema, "bible_njb", \@bible_njb);
+ at bible_njb=();
+
+loadFromFile( "data/Bible.ORG.xml", 	\@bible_org );
+&feedSchema($dbh_schema, "bible_org", \@bible_org);
+ at bible_org=();
+
+loadFromFile( "data/Bible.Vul.xml", 	\@bible_vul );
+&feedSchema($dbh_schema, "bible_vul", \@bible_vul);
+ at bible_vul=();
+
+loadFromFile( "data/Bible.LXX.xml", 	\@bible_lxx );
+&feedSchema($dbh_schema, "bible_lxx", \@bible_lxx);
+ at bible_lxx=();
+
+loadFromFile( "data/refsysmap.unl", 	\@mappings );
+foreach my $scheme1 ( qw(Bible Bible.NAB Bible.NJB Bible.ORG Bible.Vul Bible.LXX) )
+{
+	foreach my $scheme2 ( qw(Bible Bible.NAB Bible.NJB Bible.ORG Bible.Vul Bible.LXX) )
+	{
+		next if ($scheme1 eq $scheme2); #no mapping neccessary
+		next if (($scheme1 ne "Bible") && ($scheme2 ne "Bible")); #no data available
+		(my $source = $scheme1) =~ s/(.*)\.(.*)/$1_$2/;
+		(my $target = $scheme2) =~ s/(.*)\.(.*)/$1_$2/;
+
+		&feedMapping( $dbh_mapper, $scheme1, lc($source), $scheme2, lc($target), \@mappings );
+	}
+}
+
+$dbh_schema->disconnect();
+$dbh_mapper->disconnect();
+
 print "Done.\n"
\ No newline at end of file



More information about the sword-cvs mailing list