[jsword-svn] r1185 - in trunk: common/src/main/java/org/crosswire/common/xml jsword/src/main/java/org/crosswire/jsword/book jsword/src/main/java/org/crosswire/jsword/book/basic jsword/src/main/java/org/crosswire/jsword/book/filter jsword/src/main/java/org/crosswire/jsword/book/filter/gbf jsword/src/main/java/org/crosswire/jsword/book/filter/osis jsword/src/main/java/org/crosswire/jsword/book/filter/plaintext jsword/src/main/java/org/crosswire/jsword/book/filter/thml jsword/src/main/java/org/crosswire/jsword/book/sword jsword/src/test/java jsword/src/test/java/org/crosswire/jsword/book jsword-limbo/src/main/java/org/crosswire/jsword/book/stub jsword-web/src/web
dmsmith at crosswire.org
dmsmith at crosswire.org
Mon Nov 13 06:32:18 MST 2006
Author: dmsmith
Date: 2006-11-13 06:32:18 -0700 (Mon, 13 Nov 2006)
New Revision: 1185
Modified:
trunk/common/src/main/java/org/crosswire/common/xml/XMLUtil.java
trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/stub/StubDictionary.java
trunk/jsword-web/src/web/change.html
trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/AbstractPassageBook.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/Filter.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/FilterFactory.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/gbf/GBFFilter.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/gbf/GBFTagBuilders.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/osis/OSISFilter.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/plaintext/PlainTextFilter.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ATag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/AnonymousTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BigTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BlockquoteTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BrTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/CenterTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/CitationTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ColTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/CustomHandler.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/DivTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/FontTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ForeignTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ITag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ImgTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/LiTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/NameTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/NoteTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/OlTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/PTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/RowTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ScripRefTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ScriptureTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SmallTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SubTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SupTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SyncTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/THMLFilter.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TableTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TdTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TermTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ThTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TrTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TtTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/UTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/UlTag.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/ConfigEntryTable.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/RawBackend.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/RawLDBackend.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordDictionary.java
trunk/jsword/src/test/java/JSwordAllTests.java
trunk/jsword/src/test/java/org/crosswire/jsword/book/ReadEverything.java
Log:
Improved handling of entities in XML.
Improved cleanup of bad xml.
Improved error messages to include book and key.
Modified: trunk/common/src/main/java/org/crosswire/common/xml/XMLUtil.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/xml/XMLUtil.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/common/src/main/java/org/crosswire/common/xml/XMLUtil.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -23,6 +23,10 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
import java.util.regex.Pattern;
import org.crosswire.common.util.FileUtil;
@@ -41,6 +45,7 @@
* @see gnu.lgpl.License for license details.<br>
* The copyright to this program is held by it's authors.
* @author Joe Walker [joe at eireneh dot com]
+ * @author DM Smith [dmsmith555 at yahoo dot com]
*/
public final class XMLUtil
{
@@ -135,8 +140,12 @@
/**
- * A parse has failed so we can try to kill the broken entities and then
- * have another go.
+ * For each entity in the input that is not allowed in XML, replace the entity with its unicode equivalent or remove it.
+ * For each instance of a bare &, replace it with &<br/>
+ * XML only allows 4 entities: &amp;, &quot;, &lt; and &gt;.
+ *
+ * @param broken the string to handle entities
+ * @return the string with entities appropriately fixed up
*/
public static String cleanAllEntities(String broken)
{
@@ -165,44 +174,42 @@
continue;
}
- // Check for chars that should not be in an entity name
int i = amp + 1;
while (true)
{
- // if we are at the end of the string the discard from the & on
+ // if we are at the end of the string then just escape the '&';
if (i >= working.length())
{
- String entity = working.substring(amp);
- String replace = guessEntity(entity);
+ //String entity = working.substring(amp);
+ //String replace = guessEntity(entity);
//DataPolice.report("replacing unterminated entity: '" + entity + "' with: '" + replace + "'");
- working = working.substring(0, amp) + replace;
- break;
+ return working.substring(0, amp) + "&" + working.substring(amp + 1); //$NON-NLS-1$
}
- // if we have come to an ; then we just have an entity that isn't
- // properly declared, (or maybe it is but something else is
- // broken) so discard it
+ // if we have come to a ; then we have an entity
+ // If it is something that xml can't handle then replace it.
char c = working.charAt(i);
if (c == ';')
{
String entity = working.substring(amp, i + 1);
- String replace = guessEntity(entity);
- //DataPolice.report("replacing entity: '" + entity + "' with: '" + replace + "'");
+ String replace = handleEntity(entity);
+ //log.warn("replacing entity: '" + entity + "' with: '" + replace + "'");
working = working.substring(0, amp) + replace + working.substring(i + 1);
break;
}
- // XML entities are letters, numbers or -????
- // If we find something else then dump the entity
- if (!Character.isLetterOrDigit(c) && c != '-')
+ // Did we end an entity without finding a closing ;
+ // Then treat it as an '&' that needs to be replaced with &
+ if (!Character.isLetterOrDigit(c))
{
- String entity = working.substring(amp, i);
- String replace = guessEntity(entity);
- //DataPolice.report("replacing invalid entity: '" + entity + "' with: '" + replace + "'");
+ //String entity = working.substring(amp, i);
+ //String replace = "&" + working.substring(amp + 1, i);
+ //log.warn("replacing invalid entity: '" + entity + "' with: '" + replace + "': " + broken);
- working = working.substring(0, amp) + replace + working.substring(i);
+ working = working.substring(0, amp) + "&" + working.substring(amp + 1); //$NON-NLS-1$
+ amp = i + 4; // account for the 4 extra characters
break;
}
@@ -216,75 +223,18 @@
}
/**
- * Attempt to guess what the entity should have been and fix it, or remove
- * it if there are no obvious replacements.
+ * Remove all invalid characters in the input.
+ * XML has stringent requirements as to which characters are or are not allowed.
+ * The set of allowable characters are:<br />
+ * #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]<br/>
+ * Note: Java handles to \uFFFF
+ *
+ * @param broken the string to be cleaned
+ * @return the cleaned string
*/
- private static String guessEntity(String brokenEntity)
+ public static String cleanAllCharacters(String broken)
{
- String broken = brokenEntity;
- // strip any beginning & or ending ;
- if (broken.endsWith(";")) //$NON-NLS-1$
- {
- broken = broken.substring(0, broken.length() - 1);
- }
- if (broken.charAt(0) == '&')
- {
- broken = broken.substring(1);
- }
-
- // pre-defined XML entities
- if ("amp".equals(broken)) //$NON-NLS-1$
- {
- return "&"; //$NON-NLS-1$
- }
- if ("lt".equals(broken)) //$NON-NLS-1$
- {
- return "<"; //$NON-NLS-1$
- }
- if ("gt".equals(broken)) //$NON-NLS-1$
- {
- return ">"; //$NON-NLS-1$
- }
- if ("quot".equals(broken)) //$NON-NLS-1$
- {
- return """; //$NON-NLS-1$
- }
-
- // common HTML entities
- if ("nbsp".equals(broken)) //$NON-NLS-1$
- {
- return " "; //$NON-NLS-1$
- }
- if ("pound".equals(broken)) //$NON-NLS-1$
- {
- return "£"; //$NON-NLS-1$
- }
- if ("yen".equals(broken)) //$NON-NLS-1$
- {
- return "¥"; //$NON-NLS-1$
- }
- if ("euro".equals(broken)) //$NON-NLS-1$
- {
- return "€"; //$NON-NLS-1$
- }
- if ("copy".equals(broken)) //$NON-NLS-1$
- {
- return "©"; //$NON-NLS-1$
- }
- if ("para".equals(broken)) //$NON-NLS-1$
- {
- return "¶"; //$NON-NLS-1$
- }
- if ("lsquo".equals(broken)) //$NON-NLS-1$
- {
- return "‘"; //$NON-NLS-1$
- }
- if ("rsquo".equals(broken)) //$NON-NLS-1$
- {
- return "’"; //$NON-NLS-1$
- }
-
- return ""; //$NON-NLS-1$
+ return invalidCharacterPattern.matcher(broken).replaceAll(""); //$NON-NLS-1$;
}
/**
@@ -371,9 +321,157 @@
}
/**
+ * Replace entity with its unicode equivalent, if it is not a valid XML entity.
+ * Otherwise strip it out.
+ * XML only allows 4 entities: &amp;, &quot;, &lt; and &gt;.
+ *
+ * @param entity the entity to be replaced
+ * @return the substitution for the entity, either itself, the unicode equivalent or an empty string.
+ */
+ private static String handleEntity(String entity)
+ {
+ if (goodEntities.contains(entity))
+ {
+ return entity;
+ }
+
+ String replace = (String) badEntities.get(entity);
+ if (replace != null)
+ {
+ return replace;
+ }
+
+ return ""; //$NON-NLS-1$
+ }
+
+ // Map entities to their unicode equivalent
+ static Set goodEntities = new HashSet();
+ static Map badEntities = new HashMap();
+ static
+ {
+ // pre-defined XML entities
+ goodEntities.add("""); //$NON-NLS-1$ // quotation mark
+ goodEntities.add("&"); //$NON-NLS-1$ // ampersand
+ goodEntities.add("<"); //$NON-NLS-1$ // less-than sign
+ goodEntities.add(">"); //$NON-NLS-1$ // greater-than sign
+
+ // misc entities
+ badEntities.put("€", "\u20AC"); //$NON-NLS-1$ //$NON-NLS-2$ // euro
+ badEntities.put("‘", "\u2018"); //$NON-NLS-1$ //$NON-NLS-2$ // left single quotation mark
+ badEntities.put("’", "\u2019"); //$NON-NLS-1$ //$NON-NLS-2$ // right single quotation mark
+
+ // Latin 1 entities
+ badEntities.put(" ", "\u00A0"); //$NON-NLS-1$ //$NON-NLS-2$ // no-break space
+ badEntities.put("¡", "\u00A1"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted exclamation mark
+ badEntities.put("¢", "\u00A2"); //$NON-NLS-1$ //$NON-NLS-2$ // cent sign
+ badEntities.put("£", "\u00A3"); //$NON-NLS-1$ //$NON-NLS-2$ // pound sign
+ badEntities.put("¤", "\u00A4"); //$NON-NLS-1$ //$NON-NLS-2$ // currency sign
+ badEntities.put("¥", "\u00A5"); //$NON-NLS-1$ //$NON-NLS-2$ // yen sign
+ badEntities.put("¦", "\u00A6"); //$NON-NLS-1$ //$NON-NLS-2$ // broken vertical bar
+ badEntities.put("§", "\u00A7"); //$NON-NLS-1$ //$NON-NLS-2$ // section sign
+ badEntities.put("¨", "\u00A8"); //$NON-NLS-1$ //$NON-NLS-2$ // diaeresis
+ badEntities.put("©", "\u00A9"); //$NON-NLS-1$ //$NON-NLS-2$ // copyright sign
+ badEntities.put("ª", "\u00AA"); //$NON-NLS-1$ //$NON-NLS-2$ // feminine ordinal indicator
+ badEntities.put("«", "\u00AB"); //$NON-NLS-1$ //$NON-NLS-2$ // left-pointing double angle quotation mark
+ badEntities.put("¬", "\u00AC"); //$NON-NLS-1$ //$NON-NLS-2$ // not sign
+ badEntities.put("­", "\u00AD"); //$NON-NLS-1$ //$NON-NLS-2$ // soft hyphen
+ badEntities.put("®", "\u00AE"); //$NON-NLS-1$ //$NON-NLS-2$ // registered sign
+ badEntities.put("¯", "\u00AF"); //$NON-NLS-1$ //$NON-NLS-2$ // macron
+ badEntities.put("°", "\u00B0"); //$NON-NLS-1$ //$NON-NLS-2$ // degree sign
+ badEntities.put("±", "\u00B1"); //$NON-NLS-1$ //$NON-NLS-2$ // plus-minus sign
+ badEntities.put("²", "\u00B2"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript two
+ badEntities.put("³", "\u00B3"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript three
+ badEntities.put("´", "\u00B4"); //$NON-NLS-1$ //$NON-NLS-2$ // acute accent
+ badEntities.put("µ", "\u00B5"); //$NON-NLS-1$ //$NON-NLS-2$ // micro sign
+ badEntities.put("¶", "\u00B6"); //$NON-NLS-1$ //$NON-NLS-2$ // pilcrow sign
+ badEntities.put("·", "\u00B7"); //$NON-NLS-1$ //$NON-NLS-2$ // middle dot
+ badEntities.put("¸", "\u00B8"); //$NON-NLS-1$ //$NON-NLS-2$ // cedilla
+ badEntities.put("¹", "\u00B9"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript one
+ badEntities.put("º", "\u00BA"); //$NON-NLS-1$ //$NON-NLS-2$ // masculine ordinal indicator
+ badEntities.put("»", "\u00BB"); //$NON-NLS-1$ //$NON-NLS-2$ // right-pointing double angle quotation mark
+ badEntities.put("¼", "\u00BC"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one quarter
+ badEntities.put("½", "\u00BD"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one half
+ badEntities.put("¾", "\u00BE"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction three quarters
+ badEntities.put("¿", "\u00BF"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted question mark
+ badEntities.put("À", "\u00C0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with grave
+ badEntities.put("Á", "\u00C1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with acute
+ badEntities.put("Â", "\u00C2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with circumflex
+ badEntities.put("Ã", "\u00C3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with tilde
+ badEntities.put("Ä", "\u00C4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with diaeresis
+ badEntities.put("Å", "\u00C5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with ring above
+ badEntities.put("Æ", "\u00C6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter AE
+ badEntities.put("Ç", "\u00C7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter C with cedilla
+ badEntities.put("È", "\u00C8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with grave
+ badEntities.put("É", "\u00C9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with acute
+ badEntities.put("Ê", "\u00CA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with circumflex
+ badEntities.put("Ë", "\u00CB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with diaeresis
+ badEntities.put("Ì", "\u00CC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with grave
+ badEntities.put("Í", "\u00CD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with acute
+ badEntities.put("Î", "\u00CE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with circumflex
+ badEntities.put("Ï", "\u00CF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with diaeresis
+ badEntities.put("Ð", "\u00D0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter ETH
+ badEntities.put("Ñ", "\u00D1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter N with tilde
+ badEntities.put("Ò", "\u00D2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with grave
+ badEntities.put("Ó", "\u00D3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with acute
+ badEntities.put("Ô", "\u00D4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with circumflex
+ badEntities.put("Õ", "\u00D5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with tilde
+ badEntities.put("Ö", "\u00D6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with diaeresis
+ badEntities.put("×", "\u00D7"); //$NON-NLS-1$ //$NON-NLS-2$ // multiplication sign
+ badEntities.put("Ø", "\u00D8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with stroke
+ badEntities.put("Ù", "\u00D9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with grave
+ badEntities.put("Ú", "\u00DA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with acute
+ badEntities.put("Û", "\u00DB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with circumflex
+ badEntities.put("Ü", "\u00DC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with diaeresis
+ badEntities.put("Ý", "\u00DD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter Y with acute
+ badEntities.put("Þ", "\u00DE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter THORN
+ badEntities.put("ß", "\u00DF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter sharp s
+ badEntities.put("à", "\u00E0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with grave
+ badEntities.put("á", "\u00E1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with acute
+ badEntities.put("â", "\u00E2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with circumflex
+ badEntities.put("ã", "\u00E3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with tilde
+ badEntities.put("ä", "\u00E4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with diaeresis
+ badEntities.put("å", "\u00E5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with ring above
+ badEntities.put("æ", "\u00E6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter ae
+ badEntities.put("ç", "\u00E7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter c with cedilla
+ badEntities.put("è", "\u00E8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with grave
+ badEntities.put("é", "\u00E9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with acute
+ badEntities.put("ê", "\u00EA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with circumflex
+ badEntities.put("ë", "\u00EB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with diaeresis
+ badEntities.put("ì", "\u00EC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with grave
+ badEntities.put("í", "\u00ED"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with acute
+ badEntities.put("î", "\u00EE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with circumflex
+ badEntities.put("ï", "\u00EF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with diaeresis
+ badEntities.put("ð", "\u00F0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter eth
+ badEntities.put("ñ", "\u00F1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter n with tilde
+ badEntities.put("ò", "\u00F2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with grave
+ badEntities.put("ó", "\u00F3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with acute
+ badEntities.put("ô", "\u00F4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with circumflex
+ badEntities.put("õ", "\u00F5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with tilde
+ badEntities.put("ö", "\u00F6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with diaeresis
+ badEntities.put("÷", "\u00F7"); //$NON-NLS-1$ //$NON-NLS-2$ // division sign
+ badEntities.put("ø", "\u00F8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with stroke
+ badEntities.put("ù", "\u00F9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with grave
+ badEntities.put("ú", "\u00FA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with acute
+ badEntities.put("û", "\u00FB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with circumflex
+ badEntities.put("ü", "\u00FC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with diaeresis
+ badEntities.put("ý", "\u00FD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with acute
+ badEntities.put("þ", "\u00FE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter thorn
+ badEntities.put("ÿ", "\u00FF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with diaeresis
+ }
+
+ /**
* The log stream
*/
private static final Logger log = Logger.getLogger(XMLUtil.class);
+ /**
+ * Pattern for numeric entities.
+ */
private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); //$NON-NLS-1$
+
+ /**
+ * Pattern that negates the allowable XML unicode characters in the range of \u0000-\uFFFF.
+ * Valid are: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
+ */
+ private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); //$NON-NLS-1$
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -250,7 +250,9 @@
public static final String ATTRIBUTE_SPEAKER_WHO = "who"; //$NON-NLS-1$
public static final String ATTRIBUTE_W_MORPH = "morph"; //$NON-NLS-1$
public static final String ATTRIBUTE_OSISTEXT_OSISIDWORK = "osisIDWork"; //$NON-NLS-1$
- public static final String OSIS_ATTR_LANG = "xml:lang"; //$NON-NLS-1$
+ // OSIS defines the long attribute as the one from the xml namespace
+ // Typical usage element.setAttribute(OSISUtil.OSIS_ATTR_LANG, lang, Namespace.XML_NAMESPACE);
+ public static final String OSIS_ATTR_LANG = "lang"; //$NON-NLS-1$
public static final String ATTRIBUTE_DIV_BOOK = "book"; //$NON-NLS-1$
/**
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/AbstractPassageBook.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/AbstractPassageBook.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/AbstractPassageBook.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -94,7 +94,7 @@
// If the verse is empty then we shouldn't add the verse tag
if (txt.length() > 0)
{
- List osisContent = getFilter().toOSIS(verse, txt);
+ List osisContent = getFilter().toOSIS(this, verse, txt);
addOSIS(verse, div, osisContent);
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/Filter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/Filter.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/Filter.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -23,6 +23,7 @@
import java.util.List;
+import org.crosswire.jsword.book.Book;
import org.crosswire.jsword.passage.Key;
/**
@@ -32,7 +33,7 @@
* The copyright to this program is held by it's authors.
* @author Joe Walker [joe at eireneh dot com]
*/
-public interface Filter
+public interface Filter extends Cloneable
{
/**
* Converter from plain (encoded) text to OSIS data
@@ -40,5 +41,12 @@
* @param plain The encoded text
* @return a List of OSIS Elements
*/
- List toOSIS(Key key, String plain) throws FilterException;
+ List toOSIS(Book book, Key key, String plain) throws FilterException;
+
+ /**
+ * This needs to be declared here so that it is visible as a method
+ * on a derived Key.
+ * @return A complete copy of ourselves
+ */
+ Object clone();
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/FilterFactory.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/FilterFactory.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/FilterFactory.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -131,7 +131,7 @@
reply = deft;
}
- return reply;
+ return (Filter) reply.clone();
}
/**
@@ -139,7 +139,7 @@
*/
public static Filter getDefaultFilter()
{
- return deft;
+ return (Filter) deft.clone();
}
/**
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/gbf/GBFFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/gbf/GBFFilter.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/gbf/GBFFilter.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -25,6 +25,7 @@
import java.util.LinkedList;
import java.util.List;
+import org.crosswire.jsword.book.Book;
import org.crosswire.jsword.book.DataPolice;
import org.crosswire.jsword.book.OSISUtil;
import org.crosswire.jsword.book.filter.Filter;
@@ -45,16 +46,16 @@
public class GBFFilter implements Filter
{
/* (non-Javadoc)
- * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.filter.BookDataListener, java.lang.String)
+ * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.Book, org.crosswire.jsword.passage.Key, java.lang.String)
*/
- public List toOSIS(Key key, String plain) throws FilterException
+ public List toOSIS(Book book, Key key, String plain) throws FilterException
{
DataPolice.setKey(key);
Element ele = OSISUtil.factory().createDiv();
LinkedList stack = new LinkedList();
stack.addFirst(ele);
- List taglist = parseTags(plain.trim());
+ List taglist = parseTags(book, key, plain.trim());
while (true)
{
if (taglist.isEmpty())
@@ -71,11 +72,27 @@
return ele.removeContent();
}
+ /* (non-Javadoc)
+ * @see java.lang.Object#clone()
+ */
+ public Object clone()
+ {
+ try
+ {
+ return super.clone();
+ }
+ catch (CloneNotSupportedException e)
+ {
+ assert false : e;
+ }
+ return null;
+ }
+
/**
* Turn the string into a list of tags in the order that they appear in the
* original string.
*/
- private List parseTags(String aRemains)
+ private List parseTags(Book book, Key key, String aRemains)
{
String remains = aRemains;
List taglist = new ArrayList();
@@ -96,7 +113,7 @@
// check that we don't have unmatched tags
if (ltpos == -1 || gtpos == -1)
{
- DataPolice.report("ignoring unmatched '<' or '>' in gbf: " + remains); //$NON-NLS-1$
+ DataPolice.report("In " + book.getInitials() + "(" + key.getName() + ") ignoring unmatched '<' or '>' in gbf: " + remains); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
taglist.add(GBFTagBuilders.getTextTag(remains));
remains = null;
break;
@@ -105,7 +122,7 @@
// check that the tags are in a sensible order
if (ltpos > gtpos)
{
- DataPolice.report("ignoring transposed '<' or '>' in gbf: " + remains); //$NON-NLS-1$
+ DataPolice.report("In " + book.getInitials() + "(" + key.getName() + ") ignoring transposed '<' or '>' in gbf: " + remains); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
taglist.add(GBFTagBuilders.getTextTag(remains));
remains = null;
break;
@@ -142,7 +159,7 @@
int length = tag.length();
if (length > 0)
{
- Tag reply = GBFTagBuilders.getTag(tag);
+ Tag reply = GBFTagBuilders.getTag(book, key, tag);
if (reply != null)
{
taglist.add(reply);
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/gbf/GBFTagBuilders.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/gbf/GBFTagBuilders.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/gbf/GBFTagBuilders.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -25,6 +25,7 @@
import java.util.Map;
import org.crosswire.common.util.Logger;
+import org.crosswire.jsword.book.Book;
import org.crosswire.jsword.book.filter.gbf.GBFTags.BoldStartTag;
import org.crosswire.jsword.book.filter.gbf.GBFTags.CrossRefStartTag;
import org.crosswire.jsword.book.filter.gbf.GBFTags.DefaultEndTag;
@@ -46,6 +47,7 @@
import org.crosswire.jsword.book.filter.gbf.GBFTags.TextTag;
import org.crosswire.jsword.book.filter.gbf.GBFTags.TitleStartTag;
import org.crosswire.jsword.book.filter.gbf.GBFTags.UnderlineStartTag;
+import org.crosswire.jsword.passage.Key;
/**
* This class is a convienence to get GBF Tags.
@@ -71,7 +73,7 @@
* @param name
* @return return a GBF Tag for the given tag name
*/
- public static Tag getTag(String name)
+ public static Tag getTag(Book book, Key key, String name)
{
Tag tag = null;
int length = name.length();
@@ -99,7 +101,7 @@
{
// I'm not confident enough that we handle all the GBF tags
// that I will blame the book instead of the program
- log.warn("Ignoring tag of <" + name + ">"); //$NON-NLS-1$ //$NON-NLS-2$
+ log.warn("In " + book.getInitials() + "(" + key.getName() + ") ignoring tag of <" + name + ">"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
//DataPolice.report("Ignoring tag of <" + name + ">");
}
}
@@ -173,6 +175,27 @@
}
/**
+ *
+ */
+ static final class EscapeTagBuilder implements TagBuilder
+ {
+ /* (non-Javadoc)
+ * @see org.crosswire.jsword.book.filter.gbf.TagBuilder#createTag(java.lang.String)
+ */
+ public Tag createTag(final String name)
+ {
+ if (name.equals("CG")) //$NON-NLS-1$
+ {
+ return new TextTag(">"); //$NON-NLS-1$
+ }
+
+ // else "CT"
+ return new TextTag("<"); //$NON-NLS-1$
+ }
+
+ }
+
+ /**
*
*/
static final class FootnoteStartTagBuilder implements TagBuilder
@@ -463,5 +486,8 @@
BUILDERS.put("WH", builder); //$NON-NLS-1$
BUILDERS.put("WG", builder); //$NON-NLS-1$
BUILDERS.put("WT", new StrongsMorphTagBuilder()); //$NON-NLS-1$
+
+ BUILDERS.put("CG", new EscapeTagBuilder()); //$NON-NLS-1$
+ BUILDERS.put("CT", new EscapeTagBuilder()); //$NON-NLS-1$
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/osis/OSISFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/osis/OSISFilter.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/osis/OSISFilter.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -27,6 +27,7 @@
import org.crosswire.common.util.Logger;
import org.crosswire.common.xml.XMLUtil;
+import org.crosswire.jsword.book.Book;
import org.crosswire.jsword.book.DataPolice;
import org.crosswire.jsword.book.OSISUtil;
import org.crosswire.jsword.book.filter.Filter;
@@ -47,16 +48,17 @@
public class OSISFilter implements Filter
{
/* (non-Javadoc)
- * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.filter.BookDataListener, java.lang.String)
+ * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.Book, org.crosswire.jsword.passage.Key, java.lang.String)
*/
- public List toOSIS(Key key, String plain)
+ public List toOSIS(Book book, Key key, String plain)
{
DataPolice.setKey(key);
Element ele = null;
Exception ex = null;
+ String clean = XMLUtil.cleanAllEntities(plain);
try
{
- ele = parse(XMLUtil.cleanAllEntities(plain));
+ ele = parse(clean);
}
catch (JDOMException e)
{
@@ -74,9 +76,9 @@
if (ex != null)
{
- DataPolice.report("Parse failed: " + ex.getMessage() + //$NON-NLS-1$
+ DataPolice.report("Parse " + book.getInitials() + "(" + key.getName() + ") failed: " + ex.getMessage() + //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
"\non: " + plain); //$NON-NLS-1$
- ele = cleanTags(plain);
+ ele = cleanTags(book, key, clean);
}
if (ele == null)
@@ -87,8 +89,24 @@
return ele.removeContent();
}
- private Element cleanTags(String plain)
+ /* (non-Javadoc)
+ * @see java.lang.Object#clone()
+ */
+ public Object clone()
{
+ try
+ {
+ return super.clone();
+ }
+ catch (CloneNotSupportedException e)
+ {
+ assert false : e;
+ }
+ return null;
+ }
+
+ private Element cleanTags(Book book, Key key, String plain)
+ {
// So just try to strip out all XML looking things
String shawn = XMLUtil.cleanAllTags(plain);
Exception ex = null;
@@ -105,7 +123,7 @@
ex = e;
}
- log.warn("Could not fix it by cleaning tags: " + ex.getMessage()); //$NON-NLS-1$
+ log.warn("Could not fix " + book.getInitials() + "(" + key.getName() + ") by cleaning tags: " + ex.getMessage()); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
return null;
}
@@ -119,24 +137,14 @@
// create a root element to house our document fragment
StringReader in = new StringReader("<div>" + plain + "</div>"); //$NON-NLS-1$ //$NON-NLS-2$
InputSource is = new InputSource(in);
-
+ SAXBuilder builder = new SAXBuilder();
Document doc = builder.build(is);
Element div = doc.getRootElement();
- // data is the div we added above so the input was a well formed
- // XML so we need to add the content of the div and not the div
- // itself
-
-// List data = div.removeContent();
return div;
}
/**
- * The JDOM parser
- */
- private SAXBuilder builder = new SAXBuilder();
-
- /**
* The log stream
*/
private static final Logger log = Logger.getLogger(OSISFilter.class);
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/plaintext/PlainTextFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/plaintext/PlainTextFilter.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/plaintext/PlainTextFilter.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -24,6 +24,7 @@
import java.util.List;
import org.crosswire.common.util.StringUtil;
+import org.crosswire.jsword.book.Book;
import org.crosswire.jsword.book.OSISUtil;
import org.crosswire.jsword.book.filter.Filter;
import org.crosswire.jsword.passage.Key;
@@ -42,9 +43,9 @@
public class PlainTextFilter implements Filter
{
/* (non-Javadoc)
- * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.filter.BookDataListener, java.lang.String)
+ * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.Book, org.crosswire.jsword.passage.Key, java.lang.String)
*/
- public List toOSIS(Key key, String plain)
+ public List toOSIS(Book book, Key key, String plain)
{
OSISUtil.OSISFactory factory = OSISUtil.factory();
Element ele = factory.createDiv();
@@ -65,4 +66,20 @@
return ele.removeContent();
}
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#clone()
+ */
+ public Object clone()
+ {
+ try
+ {
+ return super.clone();
+ }
+ catch (CloneNotSupportedException e)
+ {
+ assert false : e;
+ }
+ return null;
+ }
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ATag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ATag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ATag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -51,7 +51,11 @@
Element reference = OSISUtil.factory().createReference();
// LATER(joe): put the correct reference here
- ele.addContent(reference);
+ if (ele != null)
+ {
+ ele.addContent(reference);
+ }
+
return reference;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/AnonymousTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/AnonymousTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/AnonymousTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -56,7 +56,12 @@
{
Element seg = OSISUtil.factory().createSeg();
seg.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.TYPE_X_PREFIX + getTagName());
- ele.addContent(seg);
+
+ if (ele != null)
+ {
+ ele.addContent(seg);
+ }
+
return seg;
}
/**
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -50,7 +50,12 @@
{
Element hi = OSISUtil.factory().createHI();
hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_BOLD);
- ele.addContent(hi);
+
+ if (ele != null)
+ {
+ ele.addContent(hi);
+ }
+
return hi;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BigTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BigTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BigTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -50,7 +50,11 @@
{
Element hiEle = OSISUtil.factory().createHI();
hiEle.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_X_BIG);
- ele.addContent(hiEle);
+ if (ele != null)
+ {
+ ele.addContent(hiEle);
+ }
+
return hiEle;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BlockquoteTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BlockquoteTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BlockquoteTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,8 +49,13 @@
public Element processTag(Element ele, Attributes attrs)
{
Element q = OSISUtil.factory().createQ();
- ele.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.Q_BLOCK);
- ele.addContent(q);
+ q.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.Q_BLOCK);
+
+ if (ele != null)
+ {
+ ele.addContent(q);
+ }
+
return q;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BrTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BrTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/BrTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,7 +49,12 @@
public Element processTag(Element ele, Attributes attrs)
{
Element lb = OSISUtil.factory().createLB();
- ele.addContent(lb);
+
+ if (ele != null)
+ {
+ ele.addContent(lb);
+ }
+
return lb;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/CenterTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/CenterTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/CenterTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -50,7 +50,12 @@
{
Element seg = OSISUtil.factory().createSeg();
seg.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.SEG_CENTER);
- ele.addContent(seg);
+
+ if (ele != null)
+ {
+ ele.addContent(seg);
+ }
+
return seg;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/CitationTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/CitationTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/CitationTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,8 +49,13 @@
public Element processTag(Element ele, Attributes attrs)
{
Element q = OSISUtil.factory().createQ();
- ele.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.Q_BLOCK);
- ele.addContent(q);
+ q.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.Q_BLOCK);
+
+ if (ele != null)
+ {
+ ele.addContent(q);
+ }
+
return q;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ColTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ColTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ColTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,7 +49,12 @@
public Element processTag(Element ele, Attributes attrs)
{
Element hi = OSISUtil.factory().createCell();
- ele.addContent(hi);
+
+ if (ele != null)
+ {
+ ele.addContent(hi);
+ }
+
return hi;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/CustomHandler.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/CustomHandler.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/CustomHandler.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -27,7 +27,9 @@
import java.util.Map;
import org.crosswire.common.util.Logger;
+import org.crosswire.jsword.book.Book;
import org.crosswire.jsword.book.DataPolice;
+import org.crosswire.jsword.passage.Key;
import org.jdom.Content;
import org.jdom.Element;
import org.jdom.Text;
@@ -51,8 +53,10 @@
/**
* Simple ctor
*/
- public CustomHandler()
+ public CustomHandler(Book book, Key key)
{
+ this.book = book;
+ this.key = key;
stack = new LinkedList();
}
@@ -68,18 +72,27 @@
// then the stack is empty
if (stack.size() > 0)
{
- ele = (Element) stack.getFirst();
- // If the element and its descendants are to be ignored
- // then there is a null element on the stack
- if (ele == null)
+ Object top = stack.getFirst();
+
+ if (top instanceof Element) // It might be a text element
{
- return;
+ ele = (Element) stack.getFirst();
+
+ // If the element and its descendants are to be ignored
+ // then there is a null element on the stack
+ if (ele == null)
+ {
+ return;
+ }
}
}
Tag t = getTag(localname, qname);
- stack.addFirst(t.processTag(ele, attrs));
+ if (t != null)
+ {
+ stack.addFirst(t.processTag(ele, attrs));
+ }
}
/* (non-Javadoc)
@@ -88,6 +101,15 @@
/* @Override */
public void characters(char[] data, int offset, int length)
{
+ // what we are adding
+ String text = new String(data, offset, length);
+
+ if (stack.isEmpty())
+ {
+ stack.addFirst(new Text(text));
+ return;
+ }
+
// What we are adding to
Element current = (Element) stack.getFirst();
@@ -100,8 +122,6 @@
int size = current.getContentSize();
- // what we are adding
- String text = new String(data, offset, length);
// If the last element in the list is a string then we should add
// this string on to the end of it rather than add a new list item
// because (probably as an atrifact of the HTML/XSL transform we get
@@ -126,12 +146,20 @@
/* @Override */
public void endElement(String uri, String localname, String qname)
{
+ if (stack.isEmpty())
+ {
+ return;
+ }
// When we are done processing an element we need to remove
// it from the stack so that nothing more is attached to it.
Element finished = (Element) stack.removeFirst();
Tag t = getTag(localname, qname);
- t.processContent(finished);
+ if (t != null)
+ {
+ t.processContent(finished);
+ }
+
// If it was the last element then it was the root element
// so save it
if (stack.size() == 0)
@@ -158,16 +186,26 @@
if (t == null)
{
- log.warn("unknown thml element: " + localname + " qname=" + qname); //$NON-NLS-1$ //$NON-NLS-2$
+ log.warn("In " + book.getInitials() + "(" + key.getName() + ") unknown thml element: " + localname + " qname=" + qname); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
return t;
}
- DataPolice.report("Wrong case used in thml element: " + qname); //$NON-NLS-1$
+ DataPolice.report("In " + book.getInitials() + "(" + key.getName() + ") Wrong case used in thml element: " + qname); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
}
return t;
}
/**
+ * The book containing the data.
+ */
+ private Book book;
+
+ /**
+ * The key for the data.
+ */
+ private Key key;
+
+ /**
* When the document is parsed,
* this is the last element popped off the stack.
*/
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/DivTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/DivTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/DivTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -59,12 +59,22 @@
{
seg.setAttribute(OSISUtil.OSIS_ATTR_SUBTYPE, OSISUtil.VARIANT_CLASS + classAttr);
}
- ele.addContent(seg);
+
+ if (ele != null)
+ {
+ ele.addContent(seg);
+ }
+
return seg;
}
Element div = OSISUtil.factory().createDiv();
- ele.addContent(div);
+
+ if (ele != null)
+ {
+ ele.addContent(div);
+ }
+
return div;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/FontTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/FontTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/FontTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -80,7 +80,11 @@
XMLUtil.debugSAXAttributes(attrs);
}
- ele.addContent(seg);
+ if (ele != null)
+ {
+ ele.addContent(seg);
+ }
+
return seg;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ForeignTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ForeignTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ForeignTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -23,6 +23,7 @@
import org.crosswire.jsword.book.OSISUtil;
import org.jdom.Element;
+import org.jdom.Namespace;
import org.xml.sax.Attributes;
/**
@@ -53,10 +54,15 @@
String lang = attrs.getValue("lang"); //$NON-NLS-1$
if (lang != null)
{
- div.setAttribute(OSISUtil.OSIS_ATTR_LANG, lang);
+ // OSIS defines the long attribute as the one from the xml namespace
+ div.setAttribute(OSISUtil.OSIS_ATTR_LANG, lang, Namespace.XML_NAMESPACE);
}
- ele.addContent(div);
+ if (ele != null)
+ {
+ ele.addContent(div);
+ }
+
return div;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ITag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ITag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ITag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -50,7 +50,12 @@
{
Element hi = OSISUtil.factory().createHI();
hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_ITALIC);
- ele.addContent(hi);
+
+ if (ele != null)
+ {
+ ele.addContent(hi);
+ }
+
return hi;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ImgTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ImgTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ImgTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -50,7 +50,12 @@
{
Element img = OSISUtil.factory().createFigure();
img.setAttribute(OSISUtil.ATTRIBUTE_FIGURE_SRC, attrs.getValue("src")); //$NON-NLS-1$
- ele.addContent(img);
+
+ if (ele != null)
+ {
+ ele.addContent(img);
+ }
+
return img;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/LiTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/LiTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/LiTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,7 +49,12 @@
public Element processTag(Element ele, Attributes attrs)
{
Element item = OSISUtil.factory().createItem();
- ele.addContent(item);
+
+ if (ele != null)
+ {
+ ele.addContent(item);
+ }
+
return item;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/NameTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/NameTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/NameTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,7 +49,12 @@
public Element processTag(Element ele, Attributes attrs)
{
Element seg = OSISUtil.factory().createSeg();
- ele.addContent(seg);
+
+ if (ele != null)
+ {
+ ele.addContent(seg);
+ }
+
return seg;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/NoteTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/NoteTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/NoteTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -50,7 +50,12 @@
{
Element note = OSISUtil.factory().createNote();
note.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.NOTETYPE_STUDY);
- ele.addContent(note);
+
+ if (ele != null)
+ {
+ ele.addContent(note);
+ }
+
return note;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/OlTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/OlTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/OlTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,8 +49,13 @@
public Element processTag(Element ele, Attributes attrs)
{
Element list = OSISUtil.factory().createList();
- ele.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.LIST_ORDERED);
- ele.addContent(list);
+ list.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.LIST_ORDERED);
+
+ if (ele != null)
+ {
+ ele.addContent(list);
+ }
+
return list;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/PTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/PTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/PTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,7 +49,12 @@
public Element processTag(Element ele, Attributes attrs)
{
Element p = OSISUtil.factory().createP();
- ele.addContent(p);
+
+ if (ele != null)
+ {
+ ele.addContent(p);
+ }
+
return p;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/RowTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/RowTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/RowTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,7 +49,12 @@
public Element processTag(Element ele, Attributes attrs)
{
Element hi = OSISUtil.factory().createRow();
- ele.addContent(hi);
+
+ if (ele != null)
+ {
+ ele.addContent(hi);
+ }
+
return hi;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ScripRefTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ScripRefTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ScripRefTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -58,17 +58,20 @@
String refstr = attrs.getValue("passage"); //$NON-NLS-1$
if (refstr != null)
{
- reference = OSISUtil.factory().createReference();
+ Passage ref = null;
try
{
- Passage ref = (Passage) keyf.getKey(refstr);
- String osisname = ref.getOsisRef();
- reference.setAttribute(OSISUtil.OSIS_ATTR_REF, osisname);
+ ref = (Passage) keyf.getKey(refstr);
}
catch (NoSuchKeyException ex)
{
- DataPolice.report("Unparsable passage:" + refstr + " due to " + ex.getMessage()); //$NON-NLS-1$ //$NON-NLS-2$
+ DataPolice.report("Unparsable passage: (" + refstr + ") due to " + ex.getMessage()); //$NON-NLS-1$ //$NON-NLS-2$
}
+
+ // If we don't have a Passage then use the origial string
+ String osisname = ref != null ? ref.getOsisRef() : refstr;
+ reference = OSISUtil.factory().createReference();
+ reference.setAttribute(OSISUtil.OSIS_ATTR_REF, osisname);
}
else
{
@@ -76,7 +79,10 @@
reference = OSISUtil.factory().createReference();
}
- ele.addContent(reference);
+ if (ele != null)
+ {
+ ele.addContent(reference);
+ }
return reference;
}
@@ -90,13 +96,16 @@
String refstr = ele.getValue();
try
{
- Passage ref = (Passage) keyf.getKey(refstr);
- String osisname = ref.getOsisRef();
- ele.setAttribute(OSISUtil.OSIS_ATTR_REF, osisname);
+ if (ele.getAttribute(OSISUtil.OSIS_ATTR_REF) == null)
+ {
+ Passage ref = (Passage) keyf.getKey(refstr);
+ String osisname = ref.getOsisRef();
+ ele.setAttribute(OSISUtil.OSIS_ATTR_REF, osisname);
+ }
}
catch (NoSuchKeyException ex)
{
- DataPolice.report("Unparsable passage:" + refstr + " due to " + ex.getMessage()); //$NON-NLS-1$ //$NON-NLS-2$
+ DataPolice.report("scripRef ahs no passage attribute, unable to guess: (" + refstr + ") due to " + ex.getMessage()); //$NON-NLS-1$ //$NON-NLS-2$
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ScriptureTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ScriptureTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ScriptureTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,7 +49,12 @@
public Element processTag(Element ele, Attributes attrs)
{
Element p = OSISUtil.factory().createP();
- ele.addContent(p);
+
+ if (ele != null)
+ {
+ ele.addContent(p);
+ }
+
return p;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SmallTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SmallTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SmallTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -50,7 +50,12 @@
{
Element hi = OSISUtil.factory().createHI();
hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_X_SMALL);
- ele.addContent(hi);
+
+ if (ele != null)
+ {
+ ele.addContent(hi);
+ }
+
return hi;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SubTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SubTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SubTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -50,7 +50,12 @@
{
Element hi = OSISUtil.factory().createHI();
hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_SUB);
- ele.addContent(hi);
+
+ if (ele != null)
+ {
+ ele.addContent(hi);
+ }
+
return hi;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SupTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SupTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SupTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -50,7 +50,12 @@
{
Element hi = OSISUtil.factory().createHI();
hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_SUPER);
- ele.addContent(hi);
+
+ if (ele != null)
+ {
+ ele.addContent(hi);
+ }
+
return hi;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SyncTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SyncTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/SyncTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -139,7 +139,12 @@
{
Element div = OSISUtil.factory().createDiv();
div.setAttribute(OSISUtil.OSIS_ATTR_OSISID, "dict://" + value); //$NON-NLS-1$
- ele.addContent(div);
+
+ if (ele != null)
+ {
+ ele.addContent(div);
+ }
+
return div;
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/THMLFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/THMLFilter.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/THMLFilter.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -31,13 +31,16 @@
import org.crosswire.common.util.Logger;
import org.crosswire.common.xml.XMLUtil;
+import org.crosswire.jsword.book.Book;
import org.crosswire.jsword.book.DataPolice;
import org.crosswire.jsword.book.OSISUtil;
import org.crosswire.jsword.book.filter.Filter;
+import org.crosswire.jsword.book.filter.FilterException;
import org.crosswire.jsword.passage.Key;
import org.jdom.Element;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
/**
* Filter to convert THML to OSIS format.
@@ -55,59 +58,108 @@
public class THMLFilter implements Filter
{
/* (non-Javadoc)
- * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.filter.BookDataListener, java.lang.String)
+ * @see org.crosswire.jsword.book.filter.Filter#toOSIS(org.crosswire.jsword.book.Book, org.crosswire.jsword.passage.Key, java.lang.String)
*/
- public List toOSIS(Key key, String plain)
+ public List toOSIS(Book book, Key key, String plain) throws FilterException
{
DataPolice.setKey(key);
- Element ele = null;
- Exception ex = null;
- try
+ Element ele = cleanParse(book, key, plain);
+ DataPolice.setKey(null);
+
+ if (ele == null)
{
- ele = parse(XMLUtil.cleanAllEntities(plain));
+ if (error instanceof SAXParseException)
+ {
+ int colNumber = ((SAXParseException) error).getColumnNumber();
+ int start = Math.max(0, colNumber - 40);
+ int stop = Math.min(finalInput.length(), colNumber + 40);
+ int here = stop - start;
+ log.warn("Could not fix " + book.getInitials() + '(' + key.getName() + ") by " + //$NON-NLS-1$ //$NON-NLS-2$
+ errorMessage + ": Error here(" + colNumber + ',' + finalInput.length() +',' + here +"): " + finalInput.substring(start, stop)); //$NON-NLS-1$ //$NON-NLS-2$
+ }
+ else
+ {
+ log.warn("Could not fix " + book.getInitials() + "(" + key.getName() + ") by " + errorMessage + ": " + error.getMessage()); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
+ }
+ ele = OSISUtil.factory().createP();
}
- catch (SAXException e)
+
+ return ele.removeContent();
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#clone()
+ */
+ public Object clone()
+ {
+ try
{
- ex = e;
+ return super.clone();
}
- catch (IOException e)
+ catch (CloneNotSupportedException e)
{
- ex = e;
+ assert false : e;
}
- catch (ParserConfigurationException e)
- {
- ex = e;
- }
- finally
- {
- // Make sure that other places don't report this problem
- DataPolice.setKey(null);
- }
+ return null;
+ }
- if (ex != null)
+ private Element cleanParse(Book book, Key key, String plain)
+ {
+ // So just try to strip out all XML looking things
+ String clean = XMLUtil.cleanAllEntities(plain);
+ Element ele = parse(book, key, clean, "cleaning entities"); //$NON-NLS-1$
+
+ if (ele == null)
{
- DataPolice.report("Parse failed: " + ex.getMessage() + //$NON-NLS-1$
- "\non: " + plain); //$NON-NLS-1$
- ele = cleanTags(plain);
+ ele = cleanText(book, key, clean);
}
+ return ele;
+ }
+
+ private Element cleanText(Book book, Key key, String plain)
+ {
+ // So just try to strip out all XML looking things
+ String clean = XMLUtil.cleanAllCharacters(plain);
+ Element ele = parse(book, key, clean, "cleaning text"); //$NON-NLS-1$
+
if (ele == null)
{
- ele = OSISUtil.factory().createP();
+ ele = cleanTags(book, key, clean);
}
- return ele.removeContent();
+ return ele;
}
- private Element cleanTags(String plain)
+ private Element cleanTags(Book book, Key key, String plain)
{
// So just try to strip out all XML looking things
- String shawn = XMLUtil.cleanAllTags(plain);
+ String clean = XMLUtil.cleanAllTags(plain);
+ return parse(book, key, clean, "cleaning tags"); //$NON-NLS-1$
+ }
+
+ private Element parse(Book book, Key key, String plain, String failMessage)
+ {
Exception ex = null;
+ // We need to create a root element to house our document fragment
+ StringBuffer buf = new StringBuffer(15 + plain.length()); // 15 for the tags we add
+ buf.append('<').append(RootTag.TAG_ROOT).append('>').append(plain).append("</").append(RootTag.TAG_ROOT).append('>'); //$NON-NLS-1$
+ finalInput = buf.toString();
try
{
- return parse(shawn);
+ StringReader in = new StringReader(finalInput);
+ InputSource is = new InputSource(in);
+ SAXParserFactory spf = SAXParserFactory.newInstance();
+ SAXParser parser = spf.newSAXParser();
+ CustomHandler handler = new CustomHandler(book, key);
+
+ parser.parse(is, handler);
+ return handler.getRootElement();
}
+ catch (SAXParseException e)
+ {
+ ex = e;
+ }
catch (SAXException e)
{
ex = e;
@@ -121,33 +173,16 @@
ex = e;
}
- log.warn("Could not fix it by cleaning tags: " + ex.getMessage()); //$NON-NLS-1$
-
+ errorMessage = failMessage;
+ error = ex;
return null;
}
- /**
- * Parse a string by creating a StringReader and all the other gubbins.
- */
- private Element parse(String toparse) throws ParserConfigurationException, SAXException, IOException
- {
- // We need to create a root element to house our document fragment
- StringReader in = new StringReader("<" + RootTag.TAG_ROOT + ">" + toparse + "</" + RootTag.TAG_ROOT + ">"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
- InputSource is = new InputSource(in);
+ private String errorMessage;
+ private Exception error;
+ private String finalInput;
- SAXParser parser = spf.newSAXParser();
- CustomHandler handler = new CustomHandler();
-
- parser.parse(is, handler);
- return handler.getRootElement();
- }
-
/**
- * The SAX parser factory
- */
- private SAXParserFactory spf = SAXParserFactory.newInstance();
-
- /**
* The log stream
*/
private static final Logger log = Logger.getLogger(THMLFilter.class);
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TableTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TableTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TableTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,7 +49,12 @@
public Element processTag(Element ele, Attributes attrs)
{
Element table = OSISUtil.factory().createTable();
- ele.addContent(table);
+
+ if (ele != null)
+ {
+ ele.addContent(table);
+ }
+
return table;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TdTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TdTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TdTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,7 +49,12 @@
public Element processTag(Element ele, Attributes attrs)
{
Element cell = OSISUtil.factory().createCell();
- ele.addContent(cell);
+
+ if (ele != null)
+ {
+ ele.addContent(cell);
+ }
+
return cell;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TermTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TermTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TermTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -50,7 +50,12 @@
{
// A term in a definition.
Element name = OSISUtil.factory().createName();
- ele.addContent(name);
+
+ if (ele != null)
+ {
+ ele.addContent(name);
+ }
+
return name;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ThTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ThTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/ThTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,8 +49,13 @@
public Element processTag(Element ele, Attributes attrs)
{
Element cell = OSISUtil.factory().createCell();
- ele.addContent(cell);
+ if (ele != null)
+ {
+ ele.addContent(cell);
+ }
+
+
Element hi = OSISUtil.factory().createHI();
hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_BOLD);
cell.addContent(hi);
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TrTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TrTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TrTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,7 +49,12 @@
public Element processTag(Element ele, Attributes attrs)
{
Element row = OSISUtil.factory().createRow();
- ele.addContent(row);
+
+ if (ele != null)
+ {
+ ele.addContent(row);
+ }
+
return row;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TtTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TtTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/TtTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -50,7 +50,12 @@
{
Element hi = OSISUtil.factory().createHI();
hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_X_TT);
- ele.addContent(hi);
+
+ if (ele != null)
+ {
+ ele.addContent(hi);
+ }
+
return hi;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/UTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/UTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/UTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -50,7 +50,12 @@
{
Element hi = OSISUtil.factory().createHI();
hi.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.HI_UNDERLINE);
- ele.addContent(hi);
+
+ if (ele != null)
+ {
+ ele.addContent(hi);
+ }
+
return hi;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/UlTag.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/UlTag.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/filter/thml/UlTag.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -49,8 +49,13 @@
public Element processTag(Element ele, Attributes attrs)
{
Element list = OSISUtil.factory().createList();
- ele.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.LIST_UNORDERED);
- ele.addContent(list);
+ list.setAttribute(OSISUtil.OSIS_ATTR_TYPE, OSISUtil.LIST_UNORDERED);
+
+ if (ele != null)
+ {
+ ele.addContent(list);
+ }
+
return list;
}
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/ConfigEntryTable.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/ConfigEntryTable.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/ConfigEntryTable.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -596,20 +596,22 @@
String newDir = dir == null ? (String) ConfigEntryType.DIRECTION.getDefault() : dir;
String langEntry = (String) getValue(ConfigEntryType.LANG);
+ String langFromEntry = (String) getValue(ConfigEntryType.GLOSSARY_FROM);
+ String langToEntry = (String) getValue(ConfigEntryType.GLOSSARY_TO);
+
+ // The LANG field should match the GLOSSARY_FROM field
+ if (langFromEntry != null && !langFromEntry.equals(langEntry))
+ {
+ langEntry = langFromEntry;
+ }
+
String lang = AbstractBookMetaData.getLanguage(internal, langEntry);
add(ConfigEntryType.LANGUAGE, lang);
// This returns Left to Right if
// it does not know what it is.
- boolean leftToRight = true;
- if (langEntry != null)
- {
- leftToRight = isLeftToRight(langEntry);
- }
+ boolean leftToRight = isLeftToRight(langEntry);
- String langFromEntry = (String) getValue(ConfigEntryType.GLOSSARY_FROM);
- String langToEntry = (String) getValue(ConfigEntryType.GLOSSARY_TO);
-
if (langFromEntry != null || langToEntry != null)
{
String langFrom = AbstractBookMetaData.getLanguage(internal, langFromEntry);
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/RawBackend.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/RawBackend.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/RawBackend.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -169,9 +169,9 @@
int start = SwordUtil.decodeLittleEndian32(read, 0);
int size = SwordUtil.decodeLittleEndian16(read, 4);
- if (size < 1)
+ if (size < 0)
{
- log.error("Verse " + verse.getName() + " has a bad index size of " + size); //$NON-NLS-1$ //$NON-NLS-2$
+ log.error("In " + getBookMetaData().getInitials() + ": Verse " + verse.getName() + " has a bad index size of " + size); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
}
// Read from the data file.
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/RawLDBackend.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/RawLDBackend.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/RawLDBackend.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -197,7 +197,8 @@
keytitle = keytitle.substring(0, keytitle.length() - 1);
}
- if (isDailyDevotional)
+ // Massage keytitle if can be.
+ if (isDailyDevotional && keytitle.length() >= 3)
{
String[] parts = StringUtil.splitAll(keytitle, '.');
greg.set(Calendar.MONTH, Integer.parseInt(parts[0]) - 1);
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordDictionary.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordDictionary.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordDictionary.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -133,7 +133,7 @@
String txt = backend.getRawText(key);
- List osisContent = sbmd.getFilter().toOSIS(key, txt);
+ List osisContent = sbmd.getFilter().toOSIS(this, key, txt);
div.addContent(osisContent);
return new BookData(osis, this, key);
Modified: trunk/jsword/src/test/java/JSwordAllTests.java
===================================================================
--- trunk/jsword/src/test/java/JSwordAllTests.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/test/java/JSwordAllTests.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -60,6 +60,7 @@
suite.addTestSuite(org.crosswire.jsword.book.BooksTest.class);
suite.addTestSuite(org.crosswire.jsword.book.BookMetaDataTest.class);
suite.addTestSuite(org.crosswire.jsword.book.SentanceUtilTest.class);
+ // run independently: suite.addTestSuite(org.crosswire.jsword.book.ReadEverything.class);
// commented out because the tests were very poor.
//suite.addTestSuite(org.crosswire.jsword.book.OsisTest.class);
Modified: trunk/jsword/src/test/java/org/crosswire/jsword/book/ReadEverything.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/book/ReadEverything.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/book/ReadEverything.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -24,12 +24,17 @@
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
+import java.util.Locale;
+import java.util.ResourceBundle;
+import org.crosswire.common.config.ChoiceFactory;
import org.crosswire.common.config.Config;
+import org.crosswire.common.util.CWClassLoader;
import org.crosswire.common.util.Logger;
import org.crosswire.common.util.ResourceUtil;
import org.crosswire.common.xml.XMLUtil;
import org.crosswire.jsword.passage.Key;
+import org.crosswire.jsword.util.Project;
import org.jdom.Document;
import org.jdom.JDOMException;
@@ -54,34 +59,53 @@
*/
public static void main(String[] args) throws IOException, JDOMException
{
- Logger.outputInfoMinimum();
+ Logger.outputEverything();
+ // Calling Project.instance() will set up the project's home directory
+ // ~/.jsword
+ // This will set it as a place to look for overrides for
+ // ResourceBundles, properties and other resources
+ Project.instance();
+
+ // And the array of allowed osis>html converters
+ ChoiceFactory.getDataMap().put("converters", new String[] {}); //$NON-NLS-1$
+
+ // The choice of configurable XSL stylesheets
+ ChoiceFactory.getDataMap().put("cswing-styles", new String[] {}); //$NON-NLS-1$
+
// Load the desktop configuration so we can find the sword drivers
Config config = new Config("Desktop Options"); //$NON-NLS-1$
Document xmlconfig = XMLUtil.getDocument("config"); //$NON-NLS-1$
- config.add(xmlconfig, null);
+
+ Locale defaultLocale = Locale.getDefault();
+ ResourceBundle configResources = ResourceBundle.getBundle("config", defaultLocale, CWClassLoader.instance(ReadEverything.class)); //$NON-NLS-1$
+
+ config.add(xmlconfig, configResources);
+
config.setProperties(ResourceUtil.getProperties("desktop")); //$NON-NLS-1$
config.localToApplication();
// Loop through all the Bookks
- log.info("*** Reading all known Books"); //$NON-NLS-1$
+ log.warn("*** Reading all known Books"); //$NON-NLS-1$
List comments = Books.installed().getBooks();
for (Iterator cit = comments.iterator(); cit.hasNext();)
{
Book book = (Book) cit.next();
+ log.warn("****** Reading: " + book.getInitials()); //$NON-NLS-1$
+
Key set = book.getGlobalKeyList();
- testReadMultiple(book.getBookMetaData(), book, set);
+ testReadMultiple(book, set);
}
}
/**
* Perform a test read on an iterator over a set of keys
*/
- private static void testReadMultiple(BookMetaData bmd, Book book, Key set)
+ private static void testReadMultiple(Book book, Key set)
{
- DataPolice.setBook(bmd);
+ DataPolice.setBook(book.getBookMetaData());
//log.info("Testing: "+bmd.getInitials()+"="+bmd.getFullName());
long start = System.currentTimeMillis();
@@ -90,15 +114,7 @@
Iterator it = set.iterator();
while (it.hasNext())
{
- Key subset = (Key) it.next();
- if (subset.canHaveChildren())
- {
- testReadSingle(bmd, book, subset);
- }
- else
- {
- testReadSingle(bmd, book, subset);
- }
+ testReadSingle(book, (Key) it.next());
entries++;
}
@@ -106,22 +122,22 @@
long end = System.currentTimeMillis();
float time = (end - start) / 1000F;
- log.info("Tested: book="+bmd.getInitials()+" entries="+entries+" time="+time+"s ("+(1000*time/entries)+"ms per entry)"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
+ log.info("Tested: book="+book.getInitials()+" entries="+entries+" time="+time+"s ("+(1000*time/entries)+"ms per entry)"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$
}
/**
* Perform a test read on a single key
*/
- private static void testReadSingle(BookMetaData bmd, Book book, Key key)
+ private static void testReadSingle(Book book, Key key)
{
try
{
//log.debug("reading: "+bmd.getInitials()+"/"+key.getText());
BookData data = book.getData(key);
- if (data.getPlainText() == null)
+ if (data.getOsis() == null)
{
- log.warn("No output from: "+bmd.getInitials()+", "+key.getName()); //$NON-NLS-1$ //$NON-NLS-2$
+ log.warn("No output from: "+book.getInitials()+", "+key.getName()); //$NON-NLS-1$ //$NON-NLS-2$
}
// This might be a useful extra test, except that a failure gives you no help at all.
@@ -130,16 +146,16 @@
/*
catch (ValidationException ex)
{
- log.warn("Validation error reading: "+bmd.getInitials()+", "+key.getText()+", code:"+ex.getErrorCode()+" reason: "+ex.getMessage());
+ log.warn("Validation error reading: "+book.getInitials()+", "+key.getText()+", code:"+ex.getErrorCode()+" reason: "+ex.getMessage());
}
*/
catch (BookException ex)
{
- log.warn("Failed to read: "+bmd.getInitials()+", "+key.getName()+", reason: "+ex.getMessage(), ex); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
+ log.warn("Failed to read: "+book.getInitials()+", "+key.getName()+", reason: "+ex.getMessage(), ex); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
}
catch (Throwable ex)
{
- log.error("Unexpected error reading: "+bmd.getInitials()+", "+key.getName(), ex); //$NON-NLS-1$ //$NON-NLS-2$
+ log.error("Unexpected error reading: "+book.getInitials()+", "+key.getName(), ex); //$NON-NLS-1$ //$NON-NLS-2$
}
}
Modified: trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/stub/StubDictionary.java
===================================================================
--- trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/stub/StubDictionary.java 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/stub/StubDictionary.java 2006-11-13 13:32:18 UTC (rev 1185)
@@ -71,7 +71,7 @@
div.addContent(title);
text.addContent(div);
- List osisContent = FilterFactory.getDefaultFilter().toOSIS(key, "stub implementation"); //$NON-NLS-1$
+ List osisContent = FilterFactory.getDefaultFilter().toOSIS(this, key, "stub implementation"); //$NON-NLS-1$
div.addContent(osisContent);
BookData bdata = new BookData(osis, this, key);
Modified: trunk/jsword-web/src/web/change.html
===================================================================
--- trunk/jsword-web/src/web/change.html 2006-11-13 12:31:46 UTC (rev 1184)
+++ trunk/jsword-web/src/web/change.html 2006-11-13 13:32:18 UTC (rev 1185)
@@ -13,7 +13,7 @@
<p>The following is the broad outline of the version of JSword,
BibleDesktop and Project-B (the old name for JSword).</p>
<ul>
- <li><strong>Version 1.0.4</strong> - BABEL (FIFTH) UPDATE - Many small, but significant changes.
+ <li><strong>Version 1.0.5</strong> - BABEL (FIFTH) UPDATE - Many small, but significant changes.
<ul>
<li>Bible book names in over 30 languages are now supported with English as the default.
<table>
@@ -33,6 +33,7 @@
<li>Fixed a bug preventing the Clarke Module from showing.</li>
<li>Fixed a bug preventing locked dictionaries from showing correctly</li>
<li>Fixed potential bugs pointed out by the QA tools FindBugs and PMD</li>
+ <li>Fixed miscellaneous bugs found by a scan of all the modules.</li>
</ul>
</li>
<li><strong>Version 1.0.4</strong> - FOURTH UPDATE -
More information about the jsword-svn
mailing list