[jsword-svn] r1055 -
trunk/jsword/src/main/java/org/crosswire/jsword/examples
dmsmith at crosswire.org
dmsmith at crosswire.org
Wed Mar 22 14:38:22 MST 2006
Author: dmsmith
Date: 2006-03-22 14:38:14 -0700 (Wed, 22 Mar 2006)
New Revision: 1055
Modified:
trunk/jsword/src/main/java/org/crosswire/jsword/examples/BibleToOsis.java
Log:
KJV mod to osis example
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/examples/BibleToOsis.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/examples/BibleToOsis.java 2006-03-22 12:19:11 UTC (rev 1054)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/examples/BibleToOsis.java 2006-03-22 21:38:14 UTC (rev 1055)
@@ -66,7 +66,7 @@
*/
private static final String BIBLE_NAME = "KJV"; //$NON-NLS-1$
private static final String BIBLE_RANGE = "Gen-Rev"; //$NON-NLS-1$
- private static final boolean BY_CHAPTER = true;
+ private static final boolean BY_CHAPTER = false;
/**
* @param args
@@ -85,6 +85,7 @@
int lastChapter = -1;
StringBuffer buf = new StringBuffer();
boolean inPreVerse = false;
+ String bookTitle = ""; //$NON-NLS-1$
try
{
@@ -103,7 +104,22 @@
Verse verse = (Verse) key;
String raw = bible.getRawData(verse);
String osisID = verse.getOsisID();
+ Verse v = null;
+ try
+ {
+ v = VerseFactory.fromString(osisID);
+ }
+ catch (NoSuchVerseException e)
+ {
+ // does not happen
+ }
+
+// if (osisID.equals("Ps.72.1")) //$NON-NLS-1$
+// {
+// System.err.println(osisID + ':' + raw);
+// }
+
String currentBookName = BibleInfo.getOSISName(verse.getBook());
int currentChapter = verse.getChapter();
@@ -131,7 +147,8 @@
buf = new StringBuffer();
buildDocumentOpen(buf, bmd, currentBookName, BY_CHAPTER);
- buildBookOpen(buf, currentBookName);
+ buildBookOpen(buf, currentBookName, bookTitle);
+ bookTitle = ""; //$NON-NLS-1$
}
if (newBookFound || lastChapter != currentChapter)
@@ -210,6 +227,23 @@
inPreVerse = true;
}
+ // There is a bug in the KJV where NT book titles are at the end of the prior book
+ // And they contain junk!
+ if (SwordConstants.getTestament(v) == SwordConstants.TESTAMENT_NEW)
+ {
+ int start = raw.indexOf("<title"); //$NON-NLS-1$
+ if (start != -1)
+ {
+ int end = raw.indexOf("</title>", start); //$NON-NLS-1$
+ bookTitle = raw.substring(start, end + 8);
+ raw = raw.replace(bookTitle, ""); //$NON-NLS-1$
+ bookTitle = bookTitle.substring(bookTitle.indexOf('>') + 1, bookTitle.indexOf(" </title>")); //$NON-NLS-1$
+ bookTitle = bookTitle.replaceAll("<p/>", ""); //$NON-NLS-1$ //$NON-NLS-2$
+ bookTitle = bookTitle.replaceAll("</w>", ""); //$NON-NLS-1$ //$NON-NLS-2$
+ bookTitle = bookTitle.replaceAll("<w[^>]*>", ""); //$NON-NLS-1$ //$NON-NLS-2$
+ }
+ }
+
buildVerseOpen(buf, osisID);
buf.append(cleanup(osisID, raw, true));
buildVerseClose(buf, osisID);
@@ -363,6 +397,15 @@
docBuffer.append("\n <work osisWork=\"defaultReferenceScheme\">"); //$NON-NLS-1$
docBuffer.append("\n <refSystem>Bible.KJV</refSystem>"); //$NON-NLS-1$
docBuffer.append("\n </work>"); //$NON-NLS-1$
+ docBuffer.append("\n <work osisWork=\"strong\">"); //$NON-NLS-1$
+ docBuffer.append("\n <refSystem>Dict.Strongs</refSystem>"); //$NON-NLS-1$
+ docBuffer.append("\n </work>"); //$NON-NLS-1$
+ docBuffer.append("\n <work osisWork=\"robinson\">"); //$NON-NLS-1$
+ docBuffer.append("\n <refSystem>Dict.Robinsons</refSystem>"); //$NON-NLS-1$
+ docBuffer.append("\n </work>"); //$NON-NLS-1$
+ docBuffer.append("\n <work osisWork=\"strongMorph\">"); //$NON-NLS-1$
+ docBuffer.append("\n <refSystem>Dict.strongMorph</refSystem>"); //$NON-NLS-1$
+ docBuffer.append("\n </work>"); //$NON-NLS-1$
docBuffer.append("\n</header>"); //$NON-NLS-1$
docBuffer.append('\n');
MessageFormat msgFormat = new MessageFormat(docBuffer.toString()); //$NON-NLS-1$
@@ -377,10 +420,16 @@
}
}
- private void buildBookOpen(StringBuffer buf, String bookName)
+ private void buildBookOpen(StringBuffer buf, String bookName, String bookTitle)
{
MessageFormat msgFormat = new MessageFormat("<div type=\"book\" osisID=\"{0}\" canonical=\"true\">\n"); //$NON-NLS-1$
msgFormat.format(new Object[] { bookName}, buf, pos);
+
+ if (bookTitle.length() > 0)
+ {
+ MessageFormat titleFormat = new MessageFormat("<title type=\"main\">{0}</title>\n"); //$NON-NLS-1$
+ titleFormat.format(new Object[] { bookTitle }, buf, pos);
+ }
}
private void buildBookClose(StringBuffer buf)
@@ -474,8 +523,9 @@
System.err.println("This was unexpected!"); //$NON-NLS-1$
break;
}
+ String content = XMLUtil.escape(unescape(badNoteMatcher.group(4)));
input = input.replace(badNoteMatcher.group(),
- noteCleanupFormat.format(new Object[] { badNoteMatcher.group(2), badNoteMatcher.group(3), XMLUtil.escape(unescape(badNoteMatcher.group(4)))}));
+ noteCleanupFormat.format(new Object[] { badNoteMatcher.group(2), badNoteMatcher.group(3), content}));
}
else
{
@@ -511,6 +561,22 @@
}
}
+ // Add in missing w
+ if (osisID.equals("1Cor.16.24")) //$NON-NLS-1$
+ {
+ input += "<w src=\"15\" lemma=\"strong:G575\" morph=\"robinson:PREP\"></w><w src=\"11\" lemma=\"strong:G4314\" morph=\"robinson:PREP\"></w><w src=\"12\" lemma=\"strong:G2881\" morph=\"robinson:A-APM\"></w>"; //$NON-NLS-1$
+ }
+
+ if (osisID.equals("2Cor.13.14")) //$NON-NLS-1$
+ {
+ input += "<w src=\"26\" lemma=\"strong:G575\" morph=\"robinson:PREP\"></w><w src=\"22\" lemma=\"strongs:G4314\" morph=\"robinson:PREP\"></w>"; //$NON-NLS-1$
+ }
+
+ if (osisID.equals("1Thess.5.28")) //$NON-NLS-1$
+ {
+ input += "<w src=\"11\" lemma=\"strong:G4314\" morph=\"robinson:PREP\"></w><w src=\"12\" lemma=\"strong:G2331\" morph=\"robinson:N-APM\"></w>"; //$NON-NLS-1$
+ }
+
Set<Integer> before = new TreeSet<Integer>();
// Fix up bad w tags
@@ -553,13 +619,44 @@
i++;
}
+ input = input.replaceAll("changeType=\"", "type=\""); //$NON-NLS-1$ //$NON-NLS-2$
+
+ if (osisID.startsWith("Ps")) //$NON-NLS-1$
+ {
+ Matcher matcher = transChangeSegPattern.matcher(input);
+ while (matcher.find())
+ {
+ String replace = "<transChange type=\"added\">" + matcher.group(1) + "</transChange>"; //$NON-NLS-1$ //$NON-NLS-2$
+ input = input.replace(matcher.group(), replace);
+// System.err.println(osisID + " replace |" + matcher.group() + "| with |" + replace + '|'); //$NON-NLS-1$ //$NON-NLS-2$
+ }
+ }
input = input.replaceAll("\"transChange\"", "\"x-transChange\""); //$NON-NLS-1$ //$NON-NLS-2$
- input = input.replaceAll("\"type:", "\"x-"); //$NON-NLS-1$ //$NON-NLS-2$
- input = input.replaceAll("changeType=\"", "type=\""); //$NON-NLS-1$ //$NON-NLS-2$
+ input = input.replaceAll("type:", "x-"); //$NON-NLS-1$ //$NON-NLS-2$
+
+// if (input.contains(transSegStart))
+// {
+// Matcher transSegStartMatcher = transSegStartPattern.matcher(input);
+// if (transSegStartMatcher.find())
+// {
+// int start = transSegStartMatcher.start();
+// Matcher transSegEndMatcher = transSegEndPattern.matcher(input);
+// if (transSegEndMatcher.find(1 + transSegStartMatcher.end()))
+// {
+// int end = transSegEndMatcher.end();
+// String transSegText = input.substring(start, end);
+// transSegText = transSegText.substring(transSegStart.length(), transSegText.length() - transSegEnd.length());
+//// if (transSegText.indexOf('<') != -1 || transSegText.indexOf('>') != -1)
+// {
+// System.out.println(osisID + " found transseg " + transSegText + "\n\t" + orig); //$NON-NLS-1$ //$NON-NLS-2$
+// }
+// }
+// }
+// }
+
+
+
input = input.replaceAll("x-StudyNote", "study"); //$NON-NLS-1$ //$NON-NLS-2$
- input = input.replaceAll("\\s+</q>", "</q>"); //$NON-NLS-1$ //$NON-NLS-2$
- input = input.replaceAll("\\s+</transChange>", "</transChange> "); //$NON-NLS-1$ //$NON-NLS-2$
- input = input.replaceAll("<transChange type=\"added\">\\s+", " <transChange type=\"added\">"); //$NON-NLS-1$ //$NON-NLS-2$
// normalize paragraph markers and move them from the end of a verse to the beginning of the next
input = input.replaceAll("<milestone type=\"x-p\"\\s*/>", "<milestone type=\"x-p\" marker=\"\u00B6\"/>"); //$NON-NLS-1$ //$NON-NLS-2$
@@ -1479,38 +1576,48 @@
// System.err.println(osisID + " replace |" + matcher.group() + "| with |" + replace + '|'); //$NON-NLS-1$ //$NON-NLS-2$
}
- matcher = w2Pattern.matcher(input);
+ matcher = w4Pattern.matcher(input);
while (matcher.find())
{
- String replace = ") "; //$NON-NLS-1$
+ String replace = " "; //$NON-NLS-1$
input = input.replace(matcher.group(), replace);
// System.err.println(osisID + " replace |" + matcher.group() + "| with |" + replace + '|'); //$NON-NLS-1$ //$NON-NLS-2$
}
- matcher = w3Pattern.matcher(input);
+ matcher = w5Pattern.matcher(input);
while (matcher.find())
{
- String replace = " "; //$NON-NLS-1$
+ String replace = matcher.group(2) + matcher.group(1); //$NON-NLS-1$
input = input.replace(matcher.group(), replace);
// System.err.println(osisID + " replace |" + matcher.group() + "| with |" + replace + '|'); //$NON-NLS-1$ //$NON-NLS-2$
}
- matcher = w4Pattern.matcher(input);
+ matcher = w6Pattern.matcher(input);
+ if (matcher.find())
+ {
+ String replace = matcher.group(2) + matcher.group(1);
+ input = input.replace(matcher.group(), replace);
+// System.err.println(osisID + " replace |" + matcher.group() + "| with |" + replace + '|'); //$NON-NLS-1$ //$NON-NLS-2$
+ }
+
+ matcher = w2Pattern.matcher(input);
while (matcher.find())
{
- String replace = "</w>" + matcher.group(1); //$NON-NLS-1$
+ String replace = ") "; //$NON-NLS-1$
input = input.replace(matcher.group(), replace);
// System.err.println(osisID + " replace |" + matcher.group() + "| with |" + replace + '|'); //$NON-NLS-1$ //$NON-NLS-2$
}
- matcher = w5Pattern.matcher(input);
- if (matcher.find())
+ matcher = w3Pattern.matcher(input);
+ while (matcher.find())
{
- String replace = matcher.group(2) + matcher.group(1);
+ String replace = " ("; //$NON-NLS-1$
input = input.replace(matcher.group(), replace);
// System.err.println(osisID + " replace |" + matcher.group() + "| with |" + replace + '|'); //$NON-NLS-1$ //$NON-NLS-2$
}
+ input = input.replaceAll("\\s+</q>", "</q>"); //$NON-NLS-1$ //$NON-NLS-2$
+
// strip trailing spaces
int length = input.length();
int here = length;
@@ -1528,7 +1635,7 @@
// }
}
- matcher = w6Pattern.matcher(input);
+ matcher = w7Pattern.matcher(input);
while (matcher.find())
{
String replace = matcher.group(2) + matcher.group(1);
@@ -1537,6 +1644,22 @@
matcher.reset(input);
}
+ matcher = w8Pattern.matcher(input);
+ while (matcher.find())
+ {
+ String replace = matcher.group(1);
+ input = input.replace(matcher.group(), replace);
+// System.err.println(osisID + " replace |" + matcher.group() + "| with |" + replace + '|'); //$NON-NLS-1$ //$NON-NLS-2$
+ }
+
+ matcher = w9Pattern.matcher(input);
+ while (matcher.find())
+ {
+ String replace = matcher.group(2) + matcher.group(1);
+ input = input.replace(matcher.group(), replace);
+// System.err.println(osisID + " replace |" + matcher.group() + "| with |" + replace + '|'); //$NON-NLS-1$ //$NON-NLS-2$
+ }
+
// strip leading spaces
here = 0;
while (input.charAt(here) == ' ')
@@ -1573,6 +1696,9 @@
private static Pattern psalmTitleStartPattern = Pattern.compile(psalmTitleStart);
private static Pattern psalmTitleEndPattern = Pattern.compile(psalmTitleEnd); //$NON-NLS-1$
+ private static String transChangeSeg = "<seg type=\"transChange\" subType=\"type:added\">([^<]*)</seg>"; //$NON-NLS-1$
+ private static Pattern transChangeSegPattern = Pattern.compile(transChangeSeg);
+
private static String badNote = "<note type=\"[^\"]*\" (name=\"([^\"]*)\" date=\"([^\"]*)\"/)>([^<]*)</note>"; //$NON-NLS-1$
private static Pattern badNotePattern = Pattern.compile(badNote);
@@ -1608,12 +1734,14 @@
// private static Pattern axPattern = Pattern.compile(".....[sS]'[^sS< \\.].........."); //$NON-NLS-1$
private static Pattern w1Pattern = Pattern.compile("\\s([,;:.?!])"); //$NON-NLS-1$
+ private static Pattern w4Pattern = Pattern.compile("[\n\r\t]"); //$NON-NLS-1$
+ private static Pattern w5Pattern = Pattern.compile("([!\"#$%&()*+,-./:;=?@^_`{|}~ ]+)(</w>|</transChange>)"); //$NON-NLS-1$
+ private static Pattern w6Pattern = Pattern.compile("(<w\\s[^>]*>|<transChange\\s[^>]*>)([!\"#$%&'()*+,-./:;=?@^_`{|}~ ]+)"); //$NON-NLS-1$
+ private static Pattern w7Pattern = Pattern.compile("(<w\\s[^>]*></w>)([!\"#$%&'()*+,-./:;=?@^_`{|}~ ]+)"); //$NON-NLS-1$
private static Pattern w2Pattern = Pattern.compile("\\s\\)"); //$NON-NLS-1$
- private static Pattern w3Pattern = Pattern.compile("[\n\r\t]"); //$NON-NLS-1$
- private static Pattern w4Pattern = Pattern.compile("(\\{Punct}|\\s)+</w>"); //$NON-NLS-1$
- private static Pattern wxxPattern = Pattern.compile("([!\"#$%&'()*+,-./:;=?@^_`{|}~])"); //$NON-NLS-1$
- private static Pattern w5Pattern = Pattern.compile("(<w\\s[^>]*>)([!\"#$%&'()*+,-./:;=?@^_`{|}~ ]+)"); //$NON-NLS-1$
- private static Pattern w6Pattern = Pattern.compile("(<w\\s[^>]*></w>)([!\"#$%&'()*+,-./:;=?@^_`{|}~ ]+)"); //$NON-NLS-1$
+ private static Pattern w3Pattern = Pattern.compile("\\(\\s"); //$NON-NLS-1$
+ private static Pattern w8Pattern = Pattern.compile("(<milestone type=\"x-p\" marker=\"\u00B6\"/>)\\s+"); //$NON-NLS-1$
+ private static Pattern w9Pattern = Pattern.compile("(<title\\s[^>]*>)\\s+"); //$NON-NLS-1$
private static Pattern wnPattern = Pattern.compile("\\s\\s+"); //$NON-NLS-1$
private boolean moveP = false;
More information about the jsword-svn
mailing list