[Tynstep-svn] r26 - in trunk/StepTools: . lib src
ChrisBurrell at crosswire.org
ChrisBurrell at crosswire.org
Tue Nov 24 15:52:26 MST 2009
Author: ChrisBurrell
Date: 2009-11-24 15:52:26 -0700 (Tue, 24 Nov 2009)
New Revision: 26
Added:
trunk/StepTools/lib/
trunk/StepTools/lib/aopalliance.jar
trunk/StepTools/lib/commons-codec-1.3.jar
trunk/StepTools/lib/commons-httpclient-3.1.jar
trunk/StepTools/lib/commons-io-1.4.jar
trunk/StepTools/lib/commons-lang-2.4.jar
trunk/StepTools/lib/commons-logging-1.1.1.jar
trunk/StepTools/lib/javatar-2.5.jar
trunk/StepTools/lib/jdom-1.0.jar
trunk/StepTools/lib/jsword-1.6.jar
trunk/StepTools/lib/jsword-common-1.6.jar
trunk/StepTools/lib/log4j.jar
trunk/StepTools/src/
trunk/StepTools/src/BibleFileGenerator.java
trunk/StepTools/src/BibleStatsAnalyser.java
trunk/StepTools/src/ScriptureReference.java
trunk/StepTools/src/log4j.properties
Log:
Added: trunk/StepTools/lib/aopalliance.jar
===================================================================
(Binary files differ)
Property changes on: trunk/StepTools/lib/aopalliance.jar
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: trunk/StepTools/lib/commons-codec-1.3.jar
===================================================================
(Binary files differ)
Property changes on: trunk/StepTools/lib/commons-codec-1.3.jar
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: trunk/StepTools/lib/commons-httpclient-3.1.jar
===================================================================
(Binary files differ)
Property changes on: trunk/StepTools/lib/commons-httpclient-3.1.jar
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: trunk/StepTools/lib/commons-io-1.4.jar
===================================================================
(Binary files differ)
Property changes on: trunk/StepTools/lib/commons-io-1.4.jar
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: trunk/StepTools/lib/commons-lang-2.4.jar
===================================================================
(Binary files differ)
Property changes on: trunk/StepTools/lib/commons-lang-2.4.jar
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: trunk/StepTools/lib/commons-logging-1.1.1.jar
===================================================================
(Binary files differ)
Property changes on: trunk/StepTools/lib/commons-logging-1.1.1.jar
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: trunk/StepTools/lib/javatar-2.5.jar
===================================================================
(Binary files differ)
Property changes on: trunk/StepTools/lib/javatar-2.5.jar
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: trunk/StepTools/lib/jdom-1.0.jar
===================================================================
(Binary files differ)
Property changes on: trunk/StepTools/lib/jdom-1.0.jar
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: trunk/StepTools/lib/jsword-1.6.jar
===================================================================
(Binary files differ)
Property changes on: trunk/StepTools/lib/jsword-1.6.jar
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: trunk/StepTools/lib/jsword-common-1.6.jar
===================================================================
(Binary files differ)
Property changes on: trunk/StepTools/lib/jsword-common-1.6.jar
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: trunk/StepTools/lib/log4j.jar
===================================================================
(Binary files differ)
Property changes on: trunk/StepTools/lib/log4j.jar
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: trunk/StepTools/src/BibleFileGenerator.java
===================================================================
--- trunk/StepTools/src/BibleFileGenerator.java (rev 0)
+++ trunk/StepTools/src/BibleFileGenerator.java 2009-11-24 22:52:26 UTC (rev 26)
@@ -0,0 +1,253 @@
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang.StringUtils;
+import org.crosswire.common.util.Language;
+import org.crosswire.jsword.book.Book;
+import org.crosswire.jsword.book.BookCategory;
+import org.crosswire.jsword.book.BookData;
+import org.crosswire.jsword.book.BookException;
+import org.crosswire.jsword.book.Books;
+import org.crosswire.jsword.book.OSISUtil;
+import org.crosswire.jsword.book.install.InstallException;
+import org.crosswire.jsword.book.install.sword.HttpSwordInstaller;
+import org.crosswire.jsword.passage.Key;
+import org.crosswire.jsword.passage.PassageKeyFactory;
+
+public class BibleFileGenerator extends Thread {
+ private final static String proxyHostProperty = "step.proxy.host";
+ private final static String proxyPortProperty = "step.proxy.port";
+ private String initials;
+
+
+ public BibleFileGenerator() {
+ this.initials = "";
+ }
+
+
+ public BibleFileGenerator(String initials) {
+ this.initials = initials;
+ }
+
+ private String writeBible(String version) throws Exception {
+
+ StringBuffer sb = new StringBuffer(String.format("@%s%s", version, System.getProperty("line.separator")));
+ // check information has been passed in
+ if (StringUtils.isEmpty(version)) {
+ throw new Exception("Version was not provided");
+ }
+
+ Book currentBook = Books.installed().getBook(version);
+
+ Key global = currentBook.getGlobalKeyList();
+ int numVersesInVersion = global.getCardinality();
+
+ String formatBook = "";
+ String formatChapter = "";
+ String formatVerse = "";
+
+ String previousBook = "none";
+ String previousChapter = "none";
+ String previousVerse = "none" ;
+
+ String key = null;
+ boolean newBook = true;
+ for (int ii = 0; ii < numVersesInVersion; ii++) {
+ try {
+ key = global.get(ii).getOsisID();
+
+ //assume just one verse?
+ String[] split = key.split("\\.");
+ formatBook = split[0];
+ formatChapter = split[1];
+ formatVerse = split[2];
+
+ if(!formatBook.equals(previousBook)) {
+ previousBook = formatBook;
+
+ sb.append('@');
+ sb.append(formatBook);
+ sb.append(System.getProperty("line.separator"));
+
+ //if new book, also reset chapter
+ previousChapter = formatChapter;
+ sb.append("@Chapter ");
+ sb.append(formatChapter);
+ sb.append(System.getProperty("line.separator"));
+ } else if(!formatChapter.equals(previousChapter)) {
+ previousChapter = formatChapter;
+ sb.append("@Chapter ");
+ sb.append(formatChapter);
+ sb.append(System.getProperty("line.separator"));
+ }
+
+ BookData data = new BookData(currentBook, global.get(ii));
+
+ sb.append(formatVerse);
+ sb.append(". ");
+ sb.append(OSISUtil.getCanonicalText(data.getOsis()).replace('\n', ' ').replace('\r', '\n'));
+ sb.append(System.getProperty("line.separator"));
+
+ //ScriptureReference sr = new ScriptureReference(key);
+
+// if (!sr.getBook().equals(formatBook)) {
+// sb.append(String.format("@%s%s", sr.getBook().toUpperCase(), System.getProperty("line.separator")));
+// formatBook = sr.getBook();
+// newBook = true;
+// }
+//
+// if (sr.getChapter() != null && !sr.getChapter().equals(formatChapter)) {
+// sb.append(String.format("@Chapter %s%s", sr.getChapter(), System.getProperty("line.separator")));
+// newBook = false;
+// formatChapter = sr.getChapter();
+// } else if(newBook) {
+// sb.append(String.format("@Chapter 1%s", System.getProperty("line.separator")));
+// newBook = false;
+// }
+
+ } catch (BookException e) {
+ System.out
+ .println(initials + ":: A book exception has occurred whilte looking up the passage: " + key);
+ e.printStackTrace();
+ throw new Exception(e);
+ } catch (Exception ex) {
+ System.err.println(initials + ":: Could not parse key: " + key);
+ ex.printStackTrace();
+ } finally {
+ ;
+ }
+ }
+ return sb.toString();
+
+ }
+
+ //TODO: here and elsewhere, ensure that the downloaders
+ //are not hardcoded - there are at least two more sites to get
+ //bible versions from.
+ private static HttpSwordInstaller getNewCustomInstaller() {
+ System.out.println("Creating new installer for JSword");
+ HttpSwordInstaller resourceInstaller = new HttpSwordInstaller();
+
+ System.out.println("Currently hardcoded installer host to:"
+ + "www.crosswire.org");
+ System.out.println("Currently hardcoded property names for step");
+ String host = "www.crosswire.org";
+ String proxyHost = System.getProperty(proxyHostProperty);
+ String proxyPort = System.getProperty(proxyPortProperty);
+ System.out.println(String.format("Setting to (%1$s via %2$s:%3$s)",
+ "www.crosswire.org", proxyHost, proxyPort));
+
+ resourceInstaller.setHost(host);
+ if (proxyHost != null) {
+ resourceInstaller.setProxyHost(proxyHost);
+ }
+ if (proxyPort != null) {
+ resourceInstaller.setProxyPort(Integer.parseInt(proxyPort));
+ }
+
+ System.out.println("Setting package and catalog directories");
+ resourceInstaller
+ .setPackageDirectory("/ftpmirror/pub/sword/packages/rawzip");
+ resourceInstaller.setCatalogDirectory("/ftpmirror/pub/sword/raw");
+ return resourceInstaller;
+ }
+
+ private void downloadAllBibles() throws InstallException {
+ HttpSwordInstaller installer = getNewCustomInstaller();
+
+ //TODO: ensure the comment in the comment of the function is in a warning
+ //somewhere...
+ installer.reloadBookList();
+
+ List availableBooks = installer.getBooks();
+
+ for(int ii = 0; ii < availableBooks.size(); ii++) {
+ Book b = (Book) availableBooks.get(ii);
+ Language en = new Language("en");
+
+ //check book is a biblical text...
+ if(b.getBookCategory() == BookCategory.BIBLE &&
+ b.getBookMetaData().getLanguage().equals(en)
+ ) {
+ String versionKey = b.getInitials();
+
+ System.out.println("Downloading " + ((Book) availableBooks.get(ii)).getName() + "...");
+ if(Books.installed().getBook(versionKey) == null) {
+ installer.install(installer.getBook(versionKey));
+ }
+ }
+ }
+ }
+
+
+ @Override
+ public void run() {
+ try {
+ System.out.println("Starting thread for " + initials);
+ FileUtils.writeStringToFile(new File("output/" + initials + ".txt"), writeBible(initials));
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ }
+
+
+
+ //TODO: could profile the application to figure out if we can speed things
+ //up with memory settings etc, more/less threads...
+ //could also investigate rewriting it so that disk IO happens
+ //during processing to allow other threads to execute.
+ //would be interesting to profile to find out where bottlenecks are.
+ public static void main(String args[]) throws Exception {
+ BibleFileGenerator bfg = new BibleFileGenerator();
+ int numThreads = 2;
+
+
+
+ BlockingQueue<Runnable> queue = new LinkedBlockingQueue<Runnable>();
+ ThreadPoolExecutor tpe = new ThreadPoolExecutor(numThreads, numThreads,1, TimeUnit.MINUTES, queue);
+
+ //add processor thing, to kick off all the downloads, and then
+ //TODO:
+ //wait for the downloads to complete for each version, before it continues on to the
+ //bible processing...
+ //bfg.downloadAllBibles();
+ List<Book> books = Books.installed().getBooks();
+
+ //of the installed books
+ for(Book b : books) {
+ System.out.println(String.format("%s %s", b.getInitials(), b.getName()));
+ }
+
+ System.out.println("of available:\n\n");
+
+
+
+ List<Book> availableBooks = getNewCustomInstaller().getBooks();
+ for(Book b : availableBooks) {
+ System.out.println(String.format("%s %s", b.getInitials(), b.getName()));
+ }
+
+ new BibleFileGenerator("ESV").start();
+
+// for(int ii = 0; ii < books.size(); ii++) {
+// if(books.get(ii).getBookCategory().equals(BookCategory.BIBLE)) {
+//
+// String initials = books.get(ii).getInitials();
+// Thread r = new BibleFileGenerator(initials);
+// tpe.execute(r);
+// System.out.println("Size of runnable queue: " + queue.size());
+// }
+// }
+ }
+}
Added: trunk/StepTools/src/BibleStatsAnalyser.java
===================================================================
--- trunk/StepTools/src/BibleStatsAnalyser.java (rev 0)
+++ trunk/StepTools/src/BibleStatsAnalyser.java 2009-11-24 22:52:26 UTC (rev 26)
@@ -0,0 +1,34 @@
+import java.io.File;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Hashtable;
+
+import org.apache.commons.io.FileUtils;
+
+
+public class BibleStatsAnalyser {
+ public BibleStatsAnalyser() {
+ // TODO Auto-generated constructor stub
+ }
+
+
+ public static void main(String args[]) throws IOException {
+ String esv = FileUtils.readFileToString(new File("output/ESV.txt"));
+ String[] allWords = esv.split("[ ,\\.!;\\']");
+ Hashtable<String, Integer> count = new Hashtable<String, Integer>();
+
+ for(String s : allWords) {
+ if(count.containsKey(s)) {
+ Integer i = count.get(s);
+ i++;
+ count.put(s, i);
+ } else {
+ count.put(s, 1);
+ }
+ }
+
+ for(String s : allWords) {
+ System.out.println(String.format("%s,%d", s, count.get(s).intValue()));
+ }
+ }
+}
Added: trunk/StepTools/src/ScriptureReference.java
===================================================================
--- trunk/StepTools/src/ScriptureReference.java (rev 0)
+++ trunk/StepTools/src/ScriptureReference.java 2009-11-24 22:52:26 UTC (rev 26)
@@ -0,0 +1,60 @@
+public class ScriptureReference {
+ private String book = null;
+ private String chapter = null;
+ private String verse = null;
+
+ /**
+ * Given a key looking like 1 Samuel 2:13 or Philemon 1 it stores it
+ * internally as book,chapter,verse
+ *
+ * @param key
+ */
+ public ScriptureReference(String key) {
+ int lastColon = key.lastIndexOf(":");
+ int lastSpace = key.lastIndexOf(' ');
+
+ // there are two types of formats:
+ // 1 Samuel 2:13 and Philemon 1 where 1 in this case is the verse
+
+ try {
+ // ie, reference not like Philemon 1
+ if (lastColon != -1) {
+ verse = key.substring(lastColon + 1);
+ chapter = key.substring(lastSpace + 1, lastColon);
+ } else {
+ verse = key.substring(lastSpace + 1);
+ chapter = null;
+ }
+
+ book = key.substring(0, lastSpace);
+ } catch (Exception ex) {
+ System.err.println("Key: " + key);
+ System.err.println("lastColon: " + lastColon);
+ System.err.println("lastSpace: " + lastSpace);
+ System.err.println("verse: " + verse);
+ System.err.println("chapter: " + chapter);
+
+ }
+ }
+
+ /**
+ * @return the book
+ */
+ public String getBook() {
+ return book;
+ }
+
+ /**
+ * @return the chapter
+ */
+ public String getChapter() {
+ return chapter;
+ }
+
+ /**
+ * @return the verse
+ */
+ public String getVerse() {
+ return verse;
+ }
+}
Added: trunk/StepTools/src/log4j.properties
===================================================================
--- trunk/StepTools/src/log4j.properties (rev 0)
+++ trunk/StepTools/src/log4j.properties 2009-11-24 22:52:26 UTC (rev 26)
@@ -0,0 +1,9 @@
+# Set root logger level to DEBUG and its only appender to A1.
+log4j.rootLogger=WARN, A1
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.A1=org.apache.log4j.ConsoleAppender
+
+# A1 uses PatternLayout.
+log4j.appender.A1.layout=org.apache.log4j.PatternLayout
+log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
More information about the Tynstep-svn
mailing list