[Tynstep-svn] r108 - in trunk: . step-tools step-tools/.settings step-tools/src step-tools/src/main step-tools/src/main/java step-tools/src/main/resources
ChrisBurrell at crosswire.org
ChrisBurrell at crosswire.org
Wed Apr 14 14:28:21 MST 2010
Author: ChrisBurrell
Date: 2010-04-14 14:28:21 -0700 (Wed, 14 Apr 2010)
New Revision: 108
Added:
trunk/step-tools/
trunk/step-tools/.classpath
trunk/step-tools/.project
trunk/step-tools/.settings/
trunk/step-tools/.settings/org.eclipse.jdt.core.prefs
trunk/step-tools/.settings/org.maven.ide.eclipse.prefs
trunk/step-tools/pom.xml
trunk/step-tools/src/
trunk/step-tools/src/main/
trunk/step-tools/src/main/java/
trunk/step-tools/src/main/java/BibleFileGenerator.java
trunk/step-tools/src/main/java/BibleStatsAnalyser.java
trunk/step-tools/src/main/java/ScriptureReference.java
trunk/step-tools/src/main/resources/
trunk/step-tools/src/main/resources/log4j.properties
Log:
committing tools
Added: trunk/step-tools/.classpath
===================================================================
--- trunk/step-tools/.classpath (rev 0)
+++ trunk/step-tools/.classpath 2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" output="target/classes" path="src/main/java"/>
+ <classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
+ <classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/>
+ <classpathentry kind="output" path="target/classes"/>
+</classpath>
Added: trunk/step-tools/.project
===================================================================
--- trunk/step-tools/.project (rev 0)
+++ trunk/step-tools/.project 2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>step-tools</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ <buildCommand>
+ <name>org.maven.ide.eclipse.maven2Builder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.maven.ide.eclipse.maven2Nature</nature>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ </natures>
+</projectDescription>
Added: trunk/step-tools/.settings/org.eclipse.jdt.core.prefs
===================================================================
--- trunk/step-tools/.settings/org.eclipse.jdt.core.prefs (rev 0)
+++ trunk/step-tools/.settings/org.eclipse.jdt.core.prefs 2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,6 @@
+#Mon Apr 05 13:57:25 BST 2010
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.5
Added: trunk/step-tools/.settings/org.maven.ide.eclipse.prefs
===================================================================
--- trunk/step-tools/.settings/org.maven.ide.eclipse.prefs (rev 0)
+++ trunk/step-tools/.settings/org.maven.ide.eclipse.prefs 2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,9 @@
+#Mon Apr 05 13:57:25 BST 2010
+activeProfiles=
+eclipse.preferences.version=1
+fullBuildGoals=process-test-resources
+includeModules=false
+resolveWorkspaceProjects=true
+resourceFilterGoals=process-resources resources\:testResources
+skipCompilerPlugin=true
+version=1
Added: trunk/step-tools/pom.xml
===================================================================
--- trunk/step-tools/pom.xml (rev 0)
+++ trunk/step-tools/pom.xml 2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,31 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <parent>
+ <groupId>com.tyndalehouse</groupId>
+ <artifactId>step-parent</artifactId>
+ <version>1.0-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
+
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.tyndalehouse</groupId>
+ <artifactId>step-tools</artifactId>
+ <packaging>jar</packaging>
+ <name>step-tools</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.crosswire</groupId>
+ <artifactId>jsword</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </dependency>
+ </dependencies>
+</project>
Added: trunk/step-tools/src/main/java/BibleFileGenerator.java
===================================================================
--- trunk/step-tools/src/main/java/BibleFileGenerator.java (rev 0)
+++ trunk/step-tools/src/main/java/BibleFileGenerator.java 2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,253 @@
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang.StringUtils;
+import org.crosswire.common.util.Language;
+import org.crosswire.jsword.book.Book;
+import org.crosswire.jsword.book.BookCategory;
+import org.crosswire.jsword.book.BookData;
+import org.crosswire.jsword.book.BookException;
+import org.crosswire.jsword.book.Books;
+import org.crosswire.jsword.book.OSISUtil;
+import org.crosswire.jsword.book.install.InstallException;
+import org.crosswire.jsword.book.install.sword.HttpSwordInstaller;
+import org.crosswire.jsword.passage.Key;
+import org.crosswire.jsword.passage.PassageKeyFactory;
+
+public class BibleFileGenerator extends Thread {
+ private final static String proxyHostProperty = "step.proxy.host";
+ private final static String proxyPortProperty = "step.proxy.port";
+ private String initials;
+
+
+ public BibleFileGenerator() {
+ this.initials = "";
+ }
+
+
+ public BibleFileGenerator(String initials) {
+ this.initials = initials;
+ }
+
+ private String writeBible(String version) throws Exception {
+
+ StringBuffer sb = new StringBuffer(String.format("@%s%s", version, System.getProperty("line.separator")));
+ // check information has been passed in
+ if (StringUtils.isEmpty(version)) {
+ throw new Exception("Version was not provided");
+ }
+
+ Book currentBook = Books.installed().getBook(version);
+
+ Key global = currentBook.getGlobalKeyList();
+ int numVersesInVersion = global.getCardinality();
+
+ String formatBook = "";
+ String formatChapter = "";
+ String formatVerse = "";
+
+ String previousBook = "none";
+ String previousChapter = "none";
+ String previousVerse = "none" ;
+
+ String key = null;
+ boolean newBook = true;
+ for (int ii = 0; ii < numVersesInVersion; ii++) {
+ try {
+ key = global.get(ii).getOsisID();
+
+ //assume just one verse?
+ String[] split = key.split("\\.");
+ formatBook = split[0];
+ formatChapter = split[1];
+ formatVerse = split[2];
+
+ if(!formatBook.equals(previousBook)) {
+ previousBook = formatBook;
+
+ sb.append('@');
+ sb.append(formatBook);
+ sb.append(System.getProperty("line.separator"));
+
+ //if new book, also reset chapter
+ previousChapter = formatChapter;
+ sb.append("@Chapter ");
+ sb.append(formatChapter);
+ sb.append(System.getProperty("line.separator"));
+ } else if(!formatChapter.equals(previousChapter)) {
+ previousChapter = formatChapter;
+ sb.append("@Chapter ");
+ sb.append(formatChapter);
+ sb.append(System.getProperty("line.separator"));
+ }
+
+ BookData data = new BookData(currentBook, global.get(ii));
+
+ sb.append(formatVerse);
+ sb.append(". ");
+ sb.append(OSISUtil.getCanonicalText(data.getOsis()).replace('\n', ' ').replace('\r', '\n'));
+ sb.append(System.getProperty("line.separator"));
+
+ //ScriptureReference sr = new ScriptureReference(key);
+
+// if (!sr.getBook().equals(formatBook)) {
+// sb.append(String.format("@%s%s", sr.getBook().toUpperCase(), System.getProperty("line.separator")));
+// formatBook = sr.getBook();
+// newBook = true;
+// }
+//
+// if (sr.getChapter() != null && !sr.getChapter().equals(formatChapter)) {
+// sb.append(String.format("@Chapter %s%s", sr.getChapter(), System.getProperty("line.separator")));
+// newBook = false;
+// formatChapter = sr.getChapter();
+// } else if(newBook) {
+// sb.append(String.format("@Chapter 1%s", System.getProperty("line.separator")));
+// newBook = false;
+// }
+
+ } catch (BookException e) {
+ System.out
+ .println(initials + ":: A book exception has occurred whilte looking up the passage: " + key);
+ e.printStackTrace();
+ throw new Exception(e);
+ } catch (Exception ex) {
+ System.err.println(initials + ":: Could not parse key: " + key);
+ ex.printStackTrace();
+ } finally {
+ ;
+ }
+ }
+ return sb.toString();
+
+ }
+
+ //TODO: here and elsewhere, ensure that the downloaders
+ //are not hardcoded - there are at least two more sites to get
+ //bible versions from.
+ private static HttpSwordInstaller getNewCustomInstaller() {
+ System.out.println("Creating new installer for JSword");
+ HttpSwordInstaller resourceInstaller = new HttpSwordInstaller();
+
+ System.out.println("Currently hardcoded installer host to:"
+ + "www.crosswire.org");
+ System.out.println("Currently hardcoded property names for step");
+ String host = "www.crosswire.org";
+ String proxyHost = System.getProperty(proxyHostProperty);
+ String proxyPort = System.getProperty(proxyPortProperty);
+ System.out.println(String.format("Setting to (%1$s via %2$s:%3$s)",
+ "www.crosswire.org", proxyHost, proxyPort));
+
+ resourceInstaller.setHost(host);
+ if (proxyHost != null) {
+ resourceInstaller.setProxyHost(proxyHost);
+ }
+ if (proxyPort != null) {
+ resourceInstaller.setProxyPort(Integer.parseInt(proxyPort));
+ }
+
+ System.out.println("Setting package and catalog directories");
+ resourceInstaller
+ .setPackageDirectory("/ftpmirror/pub/sword/packages/rawzip");
+ resourceInstaller.setCatalogDirectory("/ftpmirror/pub/sword/raw");
+ return resourceInstaller;
+ }
+
+ private void downloadAllBibles() throws InstallException {
+ HttpSwordInstaller installer = getNewCustomInstaller();
+
+ //TODO: ensure the comment in the comment of the function is in a warning
+ //somewhere...
+ installer.reloadBookList();
+
+ List availableBooks = installer.getBooks();
+
+ for(int ii = 0; ii < availableBooks.size(); ii++) {
+ Book b = (Book) availableBooks.get(ii);
+ Language en = new Language("en");
+
+ //check book is a biblical text...
+ if(b.getBookCategory() == BookCategory.BIBLE &&
+ b.getBookMetaData().getLanguage().equals(en)
+ ) {
+ String versionKey = b.getInitials();
+
+ System.out.println("Downloading " + ((Book) availableBooks.get(ii)).getName() + "...");
+ if(Books.installed().getBook(versionKey) == null) {
+ installer.install(installer.getBook(versionKey));
+ }
+ }
+ }
+ }
+
+
+ @Override
+ public void run() {
+ try {
+ System.out.println("Starting thread for " + initials);
+ FileUtils.writeStringToFile(new File("output/" + initials + ".txt"), writeBible(initials));
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ }
+
+
+
+ //TODO: could profile the application to figure out if we can speed things
+ //up with memory settings etc, more/less threads...
+ //could also investigate rewriting it so that disk IO happens
+ //during processing to allow other threads to execute.
+ //would be interesting to profile to find out where bottlenecks are.
+ public static void main(String args[]) throws Exception {
+ BibleFileGenerator bfg = new BibleFileGenerator();
+ int numThreads = 2;
+
+
+
+ BlockingQueue<Runnable> queue = new LinkedBlockingQueue<Runnable>();
+ ThreadPoolExecutor tpe = new ThreadPoolExecutor(numThreads, numThreads,1, TimeUnit.MINUTES, queue);
+
+ //add processor thing, to kick off all the downloads, and then
+ //TODO:
+ //wait for the downloads to complete for each version, before it continues on to the
+ //bible processing...
+ //bfg.downloadAllBibles();
+ List<Book> books = Books.installed().getBooks();
+
+ //of the installed books
+ for(Book b : books) {
+ System.out.println(String.format("%s %s", b.getInitials(), b.getName()));
+ }
+
+ System.out.println("of available:\n\n");
+
+
+
+ List<Book> availableBooks = getNewCustomInstaller().getBooks();
+ for(Book b : availableBooks) {
+ System.out.println(String.format("%s %s", b.getInitials(), b.getName()));
+ }
+
+ new BibleFileGenerator("ESV").start();
+
+// for(int ii = 0; ii < books.size(); ii++) {
+// if(books.get(ii).getBookCategory().equals(BookCategory.BIBLE)) {
+//
+// String initials = books.get(ii).getInitials();
+// Thread r = new BibleFileGenerator(initials);
+// tpe.execute(r);
+// System.out.println("Size of runnable queue: " + queue.size());
+// }
+// }
+ }
+}
Added: trunk/step-tools/src/main/java/BibleStatsAnalyser.java
===================================================================
--- trunk/step-tools/src/main/java/BibleStatsAnalyser.java (rev 0)
+++ trunk/step-tools/src/main/java/BibleStatsAnalyser.java 2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,34 @@
+import java.io.File;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Hashtable;
+
+import org.apache.commons.io.FileUtils;
+
+
+public class BibleStatsAnalyser {
+ public BibleStatsAnalyser() {
+ // TODO Auto-generated constructor stub
+ }
+
+
+ public static void main(String args[]) throws IOException {
+ String esv = FileUtils.readFileToString(new File("output/ESV.txt"));
+ String[] allWords = esv.split("[ ,\\.!;\\']");
+ Hashtable<String, Integer> count = new Hashtable<String, Integer>();
+
+ for(String s : allWords) {
+ if(count.containsKey(s)) {
+ Integer i = count.get(s);
+ i++;
+ count.put(s, i);
+ } else {
+ count.put(s, 1);
+ }
+ }
+
+ for(String s : allWords) {
+ System.out.println(String.format("%s,%d", s, count.get(s).intValue()));
+ }
+ }
+}
Added: trunk/step-tools/src/main/java/ScriptureReference.java
===================================================================
--- trunk/step-tools/src/main/java/ScriptureReference.java (rev 0)
+++ trunk/step-tools/src/main/java/ScriptureReference.java 2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,60 @@
+public class ScriptureReference {
+ private String book = null;
+ private String chapter = null;
+ private String verse = null;
+
+ /**
+ * Given a key looking like 1 Samuel 2:13 or Philemon 1 it stores it
+ * internally as book,chapter,verse
+ *
+ * @param key
+ */
+ public ScriptureReference(String key) {
+ int lastColon = key.lastIndexOf(":");
+ int lastSpace = key.lastIndexOf(' ');
+
+ // there are two types of formats:
+ // 1 Samuel 2:13 and Philemon 1 where 1 in this case is the verse
+
+ try {
+ // ie, reference not like Philemon 1
+ if (lastColon != -1) {
+ verse = key.substring(lastColon + 1);
+ chapter = key.substring(lastSpace + 1, lastColon);
+ } else {
+ verse = key.substring(lastSpace + 1);
+ chapter = null;
+ }
+
+ book = key.substring(0, lastSpace);
+ } catch (Exception ex) {
+ System.err.println("Key: " + key);
+ System.err.println("lastColon: " + lastColon);
+ System.err.println("lastSpace: " + lastSpace);
+ System.err.println("verse: " + verse);
+ System.err.println("chapter: " + chapter);
+
+ }
+ }
+
+ /**
+ * @return the book
+ */
+ public String getBook() {
+ return book;
+ }
+
+ /**
+ * @return the chapter
+ */
+ public String getChapter() {
+ return chapter;
+ }
+
+ /**
+ * @return the verse
+ */
+ public String getVerse() {
+ return verse;
+ }
+}
Added: trunk/step-tools/src/main/resources/log4j.properties
===================================================================
--- trunk/step-tools/src/main/resources/log4j.properties (rev 0)
+++ trunk/step-tools/src/main/resources/log4j.properties 2010-04-14 21:28:21 UTC (rev 108)
@@ -0,0 +1,9 @@
+# Set root logger level to DEBUG and its only appender to A1.
+log4j.rootLogger=WARN, A1
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.A1=org.apache.log4j.ConsoleAppender
+
+# A1 uses PatternLayout.
+log4j.appender.A1.layout=org.apache.log4j.PatternLayout
+log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
More information about the Tynstep-svn
mailing list