// // XhtmlTeConverter.java // GoBibleCreator // // Created by Larry Waswick on Feb 19 2010, based on OsisConverter by Jolon Faichney. // For the glory of our Lord Jesus Christ and the furtherance of His Kingdom. // This file is placed into the public domain. // import java.io.*; import java.util.*; import java.util.jar.*; import jolon.xml.*; public class XhtmlTeConverter extends XMLConverter { public XhtmlTeConverter (//File collectionsFile, XMLObject xml) { super(xml); } /** Tag to identify Xhtml-TE XML data. **/ public final static String XHTML_TAG = "html"; /** XhtmlTE tag that contains all of the testaments. **/ public final static String XHTML_BODY_TAG = "body"; /** XhtmlTE tag that contains a testament (New Testament, Old Testament, Aprocrypha, etc). **/ public final static String TESTAMENT_TAG[] = {"div"}; // class="testament" /** XhtmlTE tag that contains a book (Psalms, Mark, etc). **/ public final static String BOOK_TAG = "div"; // class="scrBook" /** OSIS attribute within the BOOK_TAG that contains the book name. **/ public final static String BOOK_NAME_ATTRIBUTE = "title"; /** OSIS attribute within the BOOK_TAG that contains the short book name. It is assumed that the contents of this attribute will be Latin and will be converted to US-ASCII. **/ public final static String BOOK_SHORT_NAME_ATTRIBUTE = "title"; /** OSIS tag that contains one chapter. **/ public final static String CHAPTER_TAG_CHAPTER = "div"; // class="chapter" public final static String CHAPTER_TAG_ATTRIBUTE = "chapter"; /** OSIS attribute within the CHAPTER_TAG that contains the chapter number (eg "Gen.1"). **/ public final static String CHAPTER_NUMBER_ATTRIBUTE = "title"; /** Verse data is contained within the verse tag. **/ public final static String VERSE_TAG = "div"; /** XHTML span tag. **/ public final static String SPAN_TAG = "span"; /** XHTML class attribute. **/ public final static String CLASS_ATTRIBUTE = "class"; // class attributes in XHTML-TE that can be mapped to markup values in Go Bible // (class names are lower case here) public final static String CLASS_WORDS_OF_CHRIST = "words_of_christ"; public final static String CLASS_TRANSLATOR_ADDITION = "supplied"; public final static String CLASS_NAME_DEITY = "name_of_god"; public final static String CLASS_QUOTED_TEXT = "quoted_text"; public final static String CLASS_PROPER_NAME = "pn"; // Go Bible markup values public final static char CODE_RED_LETTER = 1; public final static char CODE_TRANSLATOR_ADDITION = 2; public final static char CODE_NAME_DEITY = 3; public final static char CODE_QUOTED_TEXT = 4; public final static char CODE_PROPER_NAME = 5; public HashMap parse(XMLObject xhtml) { // Call the GoBibleCreator convert method with the // XHTML specific tags // Extract the books from the XHTML indexed by book name return parseBooks(xhtml, XHTML_BODY_TAG, TESTAMENT_TAG, BOOK_TAG, BOOK_NAME_ATTRIBUTE, null); } /** * XHTML-TE chapters are inside a "div" tag. **/ public boolean isChapter(XMLObject xml) { String tag = xml.getTag(); String tempString = xml.getAttribute("class", "none"); Boolean isChapter = tag.equals(CHAPTER_TAG_CHAPTER) && tempString.equals(CHAPTER_TAG_ATTRIBUTE); return isChapter; } /** * OSIS chapter titles are assumed to end with a period then the chapter number. **/ public int getChapterNumber(XMLObject xml) { String chapterNumber = xml.getAttribute(CHAPTER_NUMBER_ATTRIBUTE, "*Error no " + CHAPTER_NUMBER_ATTRIBUTE + " attribute*"); return Integer.parseInt(chapterNumber); } public void parseChapter(XMLObject xml, Chapter chapter) { //int verse = 1; // Find each verse for (Enumeration e = xml.getChildren(); e.hasMoreElements(); ) { XMLObject xmlVerse = (XMLObject) e.nextElement(); //System.out.println("Tag: " + xml.getTag()); // See if it is a verse tag if (xmlVerse.getTag().equals(VERSE_TAG)) { //System.out.println("Parsing verse " + verse++); // Extract verse data and add verse String verseString = extractVerseCDATA(xmlVerse); int indexOfAmp = verseString.indexOf('&'); // Convert HTML ampersand characters if the verse data contains one. if (indexOfAmp >= 0) { verseString = convertAmpersands(verseString); } chapter.verses.addElement(verseString); chapter.allVerses.append(verseString); } } } /** * TE verse-specific version of XMLConverter.extractCDATA. Pulls out verse data, but also includes markup * formatting. See http://www.crosswire.org/wiki/Projects:Go_Bible/SymScroll#Go_Bible_Creator_support. * @param xml verse div element to parse * @return String formatted verse text */ public static String extractVerseCDATA(XMLObject xml) { StringBuilder result = new StringBuilder(); for (Enumeration e = xml.getChildren(); e.hasMoreElements(); ) { XMLObject child = (XMLObject) e.nextElement(); if (child instanceof CDATA) { // Add the CDATA to the result result.append(child.getTag()); } // Don't include CDATA within the sup, reference, or title tags else if (!child.getTag().equals("sup") && !child.getTag().equals("reference") && !child.getTag().equals("title")) { if (child.getTag().equals(SPAN_TAG)) { // object is a span - check to see if it has a class that maps to Go Bible markup if (xml.getAttribute(CLASS_ATTRIBUTE, "").equalsIgnoreCase(CLASS_WORDS_OF_CHRIST)) { result.append(CODE_RED_LETTER); result.append(extractVerseCDATA(child)); result.append(CODE_RED_LETTER); } else if (xml.getAttribute(CLASS_ATTRIBUTE, "").equalsIgnoreCase(CLASS_TRANSLATOR_ADDITION)) { result.append(CODE_TRANSLATOR_ADDITION); result.append(extractVerseCDATA(child)); result.append(CODE_TRANSLATOR_ADDITION); } else if (xml.getAttribute(CLASS_ATTRIBUTE, "").equalsIgnoreCase(CLASS_NAME_DEITY)) { result.append(CODE_NAME_DEITY); result.append(extractVerseCDATA(child)); result.append(CODE_NAME_DEITY); } else if (xml.getAttribute(CLASS_ATTRIBUTE, "").equalsIgnoreCase(CLASS_PROPER_NAME)) { result.append(CODE_PROPER_NAME); result.append(extractVerseCDATA(child)); result.append(CODE_PROPER_NAME); } else if (xml.getAttribute(CLASS_ATTRIBUTE, "").equalsIgnoreCase(CLASS_QUOTED_TEXT)) { result.append(CODE_QUOTED_TEXT); result.append(extractVerseCDATA(child)); result.append(CODE_QUOTED_TEXT); } else { // no markup - just extract the text result.append(extractVerseCDATA(child)); } } else { // The child itself may contain CDATA so check it result.append(extractVerseCDATA(child)); } } } return result.toString(); } }