[jsword-svn] r1323 - in trunk: bibledesktop/src/main/java/org/crosswire/bibledesktop/desktop common/src/main/java/org/crosswire/common/util jsword/src/main/java/org/crosswire/jsword/book jsword/src/main/java/org/crosswire/jsword/book/basic jsword/src/main/java/org/crosswire/jsword/book/sword jsword/src/main/java/org/crosswire/jsword/index/lucene
dmsmith at www.crosswire.org
dmsmith at www.crosswire.org
Wed May 16 07:08:12 MST 2007
Author: dmsmith
Date: 2007-05-16 07:08:12 -0700 (Wed, 16 May 2007)
New Revision: 1323
Modified:
trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/desktop/DesktopActions.java
trunk/common/src/main/java/org/crosswire/common/util/Languages.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/BookData.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/DefaultBookMetaData.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/ConfigEntryTable.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordBookMetaData.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java
Log:
Added the ability to index notes and titles (aka headings).
Cleaned up checkstyle complaints.
Modified: trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/desktop/DesktopActions.java
===================================================================
--- trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/desktop/DesktopActions.java 2007-05-15 14:06:45 UTC (rev 1322)
+++ trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/desktop/DesktopActions.java 2007-05-16 14:08:12 UTC (rev 1323)
@@ -593,7 +593,8 @@
/**
* Indicates whether there is MacOSX integration.
*/
- boolean osxRegistered;
+ private boolean osxRegistered;
+
/**
* The About window
*/
Modified: trunk/common/src/main/java/org/crosswire/common/util/Languages.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/util/Languages.java 2007-05-15 14:06:45 UTC (rev 1322)
+++ trunk/common/src/main/java/org/crosswire/common/util/Languages.java 2007-05-16 14:08:12 UTC (rev 1323)
@@ -17,7 +17,7 @@
* Copyright: 2005
* The copyright to this program is held by it's authors.
*
- * ID: $Id: AbstractBookMetaData.java 1311 2007-05-03 19:36:51Z dmsmith $
+ * ID: $Id$
*/
package org.crosswire.common.util;
@@ -27,7 +27,7 @@
/**
- * An implementaion of the Propery Change methods from BookMetaData.
+ * A utility class that converts ISO-639 codes or locales to their "friendly" language name.
*
* @see gnu.lgpl.License for license details.
* The copyright to this program is held by it's authors.
@@ -36,33 +36,83 @@
public class Languages
{
/**
+ * Make the class a true utility class by having a private constructor.
+ */
+ private Languages()
+ {
+ }
+
+ /**
+ * Determine whether the language code is valid.
+ * The code is valid if it is null or empty.
+ * The code is valid if it is in iso639.properties.
+ * If a locale is used for the iso639Code, it will use the part before the '_'.
+ * Thus, this code does not support dialects, except as found in the iso639.
+ *
+ * @param iso639Code
+ * @return true if the language is valid.
+ */
+ public static boolean isValidLanguage(String iso639Code)
+ {
+ String lookup = iso639Code;
+ if (lookup == null || lookup.length() == 0)
+ {
+ return true;
+ }
+
+ if (lookup.indexOf('_') != -1)
+ {
+ String[] locale = StringUtil.split(lookup, '_');
+ return isValidLanguage(locale[0]);
+ }
+
+ // These are not uncommon. Looking for them prevents exceptions
+ // and provides the same result.
+ if (lookup.startsWith("x-") || lookup.startsWith("X-") || lookup.length() > 3) //$NON-NLS-1$ //$NON-NLS-2$
+ {
+ return false;
+ }
+
+ try
+ {
+ languages.getString(lookup);
+ return true;
+ }
+ catch (MissingResourceException e)
+ {
+ return false;
+ }
+ }
+
+ /**
* Get the language name from the language code.
+ * If the code is null or empty then it is considered to be DEFAULT_LANG_CODE (that is, English).
+ * Otherwise, it will generate a log message and return unknown.
* If a locale is used for the iso639Code, it will use the part before the '_'.
* Thus, this code does not support dialects, except as found in the iso639.
*
* @param iso639Code
* @return the name of the language
*/
- public static String getLanguage(String ident, String iso639Code)
+ public static String getLanguage(String iso639Code)
{
String lookup = iso639Code;
if (lookup == null || lookup.length() == 0)
{
- return getLanguage(ident, DEFAULT_LANG_CODE);
+ return getLanguage(DEFAULT_LANG_CODE);
}
if (lookup.indexOf('_') != -1)
{
String[] locale = StringUtil.split(lookup, '_');
- return getLanguage(ident, locale[0]);
+ return getLanguage(locale[0]);
}
- // If the language begins w/ an x- then it is "Undetermined"
- // Also if it is not a 2 or 3 character code then it is not a valid
- // iso639 code.
+ // These are not uncommon. Looking for them prevents exceptions
+ // and provides the same result.
if (lookup.startsWith("x-") || lookup.startsWith("X-") || lookup.length() > 3) //$NON-NLS-1$ //$NON-NLS-2$
{
- return getLanguage(ident, UNKNOWN_LANG_CODE);
+ return getLanguage(UNKNOWN_LANG_CODE);
}
try
@@ -71,20 +121,14 @@
}
catch (MissingResourceException e)
{
- log.error("Not a valid language code:" + iso639Code + " in book " + ident); //$NON-NLS-1$ //$NON-NLS-2$
- return getLanguage(ident, UNKNOWN_LANG_CODE);
+ return getLanguage(UNKNOWN_LANG_CODE);
}
}
- /**
- * The log stream
- */
- private static final Logger log = Logger.getLogger(Languages.class);
-
public static final String DEFAULT_LANG_CODE = "en"; //$NON-NLS-1$
private static final String UNKNOWN_LANG_CODE = "und"; //$NON-NLS-1$
- private static/*final*/ResourceBundle languages;
+ private static /*final*/ ResourceBundle languages;
static
{
try
Property changes on: trunk/common/src/main/java/org/crosswire/common/util/Languages.java
___________________________________________________________________
Name: svn:keywords
+ Author Date Id Revision
Name: svn:eol-style
+ native
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/BookData.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/BookData.java 2007-05-15 14:06:45 UTC (rev 1322)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/BookData.java 2007-05-16 14:08:12 UTC (rev 1323)
@@ -106,6 +106,24 @@
}
/**
+ * Return just the notes in the book.
+ * @return The Book's notes
+ */
+ public String getNotes()
+ {
+ return OSISUtil.getNotes(getOsis());
+ }
+
+ /**
+ * Return just the headings, both canonical and non-canonical, in the book.
+ * @return The Book's headings
+ */
+ public String getHeadings()
+ {
+ return OSISUtil.getHeadings(getOsis());
+ }
+
+ /**
* Check that a BibleData is valid.
* Currently, this does nothing, and isn't used. it was broken when we used
* JAXB, however it wasn't much use then becuase JAXB did a lot to keep the
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java 2007-05-15 14:06:45 UTC (rev 1322)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java 2007-05-16 14:08:12 UTC (rev 1323)
@@ -167,6 +167,11 @@
public static final String NOTETYPE_STUDY = "x-StudyNote"; //$NON-NLS-1$
/**
+ * Constant for the cross reference note type
+ */
+ public static final String NOTETYPE_REFERENCE = "crossReference"; //$NON-NLS-1$
+
+ /**
* Constant for the variant type segment
*/
public static final String VARIANT_TYPE = "x-variant"; //$NON-NLS-1$
@@ -257,7 +262,7 @@
public static final String ATTRIBUTE_SPEAKER_WHO = "who"; //$NON-NLS-1$
public static final String ATTRIBUTE_W_MORPH = "morph"; //$NON-NLS-1$
public static final String ATTRIBUTE_OSISTEXT_OSISIDWORK = "osisIDWork"; //$NON-NLS-1$
- // OSIS defines the long attribute as the one from the xml namespace
+ // OSIS defines the lang attribute as the one from the xml namespace
// Typical usage element.setAttribute(OSISUtil.OSIS_ATTR_LANG, lang, Namespace.XML_NAMESPACE);
public static final String OSIS_ATTR_LANG = "lang"; //$NON-NLS-1$
public static final String ATTRIBUTE_DIV_BOOK = "book"; //$NON-NLS-1$
@@ -668,7 +673,7 @@
buffer.append(strongType);
buffer.append(strongsNum);
}
-
+
return buffer.toString().trim();
}
@@ -703,6 +708,48 @@
return collector.getOsisID();
}
+ /**
+ * The text of non-reference notes.
+ *
+ * @return The references in the text
+ */
+ public static String getNotes(Element root)
+ {
+ StringBuffer buffer = new StringBuffer();
+
+ Iterator contentIter = getDeepContent(root, OSISUtil.OSIS_ELEMENT_NOTE).iterator();
+ while (contentIter.hasNext())
+ {
+ Element ele = (Element) contentIter.next();
+ String attr = ele.getAttributeValue(OSISUtil.OSIS_ATTR_TYPE);
+ if (attr == null || !attr.equals(NOTETYPE_REFERENCE))
+ {
+ buffer.append(OSISUtil.getTextContent(ele));
+ }
+ }
+
+ return buffer.toString();
+ }
+
+ /**
+ * The text of non-reference notes.
+ *
+ * @return The references in the text
+ */
+ public static String getHeadings(Element root)
+ {
+ StringBuffer buffer = new StringBuffer();
+
+ Iterator contentIter = getDeepContent(root, OSISUtil.OSIS_ELEMENT_TITLE).iterator();
+ while (contentIter.hasNext())
+ {
+ Element ele = (Element) contentIter.next();
+ getCanonicalContent(ele.getName(), null, ele.getContent().iterator(), buffer);
+ }
+
+ return buffer.toString();
+ }
+
private static void getCanonicalContent(String sName, String sID, Iterator iter, StringBuffer buffer)
{
Object data = null;
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/DefaultBookMetaData.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/DefaultBookMetaData.java 2007-05-15 14:06:45 UTC (rev 1322)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/DefaultBookMetaData.java 2007-05-16 14:08:12 UTC (rev 1323)
@@ -124,7 +124,7 @@
*/
public void setLanguage(String language)
{
- putProperty(KEY_LANGUAGE, Languages.getLanguage(initials, language));
+ putProperty(KEY_LANGUAGE, Languages.getLanguage(language));
}
/**
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/ConfigEntryTable.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/ConfigEntryTable.java 2007-05-15 14:06:45 UTC (rev 1322)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/ConfigEntryTable.java 2007-05-16 14:08:12 UTC (rev 1323)
@@ -605,7 +605,7 @@
langEntry = langFromEntry;
}
- String lang = Languages.getLanguage(internal, langEntry);
+ String lang = getLanguage(internal, langEntry);
add(ConfigEntryType.LANGUAGE, lang);
// This returns Left to Right if
@@ -614,9 +614,9 @@
if (langFromEntry != null || langToEntry != null)
{
- String langFrom = Languages.getLanguage(internal, langFromEntry);
+ String langFrom = getLanguage(internal, langFromEntry);
add(ConfigEntryType.LANGUAGE_FROM, langFrom);
- String langTo = Languages.getLanguage(internal, langToEntry);
+ String langTo = getLanguage(internal, langToEntry);
add(ConfigEntryType.LANGUAGE_TO, langTo);
boolean fromLeftToRight = true;
boolean toLeftToRight = true;
@@ -763,6 +763,15 @@
// }
}
+ private String getLanguage(String initials, String iso639Code)
+ {
+ if (!Languages.isValidLanguage(iso639Code))
+ {
+ log.warn("Unknown language " + iso639Code + " in book " + initials); //$NON-NLS-1$ //$NON-NLS-2$
+ }
+ return Languages.getLanguage(iso639Code);
+ }
+
/**
* Build an ordered map so that it displays in a consistent order.
*/
@@ -811,6 +820,7 @@
}
}
}
+
private String report(String issue, String confEntryName, String line)
{
StringBuffer buf = new StringBuffer(100);
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordBookMetaData.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordBookMetaData.java 2007-05-15 14:06:45 UTC (rev 1322)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/SwordBookMetaData.java 2007-05-16 14:08:12 UTC (rev 1323)
@@ -263,7 +263,7 @@
*/
/* @Override */
public boolean hasFeature(FeatureType feature)
- {
+ {
if (cet.match(ConfigEntryType.FEATURE, feature.toString()))
{
return true;
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java 2007-05-15 14:06:45 UTC (rev 1322)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java 2007-05-16 14:08:12 UTC (rev 1323)
@@ -1,3 +1,24 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2005
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id:LuceneIndex.java 984 2006-01-23 14:18:33 -0500 (Mon, 23 Jan 2006) dmsmith $
+ */
package org.crosswire.jsword.index.lucene;
import java.io.Reader;
@@ -8,6 +29,13 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
+/**
+ * A specialized analyzer for Books that analyzes different fields differently.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
public class LuceneAnalyzer extends Analyzer
{
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java 2007-05-15 14:06:45 UTC (rev 1322)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java 2007-05-16 14:08:12 UTC (rev 1323)
@@ -69,7 +69,6 @@
* @see gnu.lgpl.License for license details.
* The copyright to this program is held by it's authors.
* @author Joe Walker [joe at eireneh dot com]
-
*/
public class LuceneIndex extends AbstractIndex implements Activatable
{
@@ -341,6 +340,8 @@
{
boolean hasStrongs = book.getBookMetaData().hasFeature(FeatureType.STRONGS_NUMBERS);
boolean hasXRefs = book.getBookMetaData().hasFeature(FeatureType.SCRIPTURE_REFERENCES);
+ boolean hasNotes = book.getBookMetaData().hasFeature(FeatureType.FOOTNOTES);
+ boolean hasHeadings = book.getBookMetaData().hasFeature(FeatureType.HEADINGS);
String oldRootName = ""; //$NON-NLS-1$
int percent = 0;
@@ -348,6 +349,8 @@
String text = ""; //$NON-NLS-1$
String strongs = ""; //$NON-NLS-1$
String xrefs = ""; //$NON-NLS-1$
+ String notes = ""; //$NON-NLS-1$
+ String headings = ""; //$NON-NLS-1$
BookData data = null;
Key subkey = null;
Document doc = null;
@@ -404,6 +407,24 @@
}
}
+ if (hasNotes)
+ {
+ notes = data.getNotes();
+ if (notes != null && notes.length() > 0)
+ {
+ doc.add(new Field(FIELD_NOTE, notes, Field.Store.NO, Field.Index.TOKENIZED));
+ }
+ }
+
+ if (hasHeadings)
+ {
+ headings = data.getHeadings();
+ if (headings != null && headings.length() > 0)
+ {
+ doc.add(new Field(FIELD_HEADING, headings, Field.Store.NO, Field.Index.TOKENIZED));
+ }
+ }
+
writer.addDocument(doc);
}
@@ -464,14 +485,19 @@
protected static final String FIELD_STRONG = "strong"; //$NON-NLS-1$
/**
+ * The Lucene field for headings
+ */
+ protected static final String FIELD_HEADING = "heading"; //$NON-NLS-1$
+
+ /**
* The Lucene field for cross references
*/
protected static final String FIELD_XREF = "xref"; //$NON-NLS-1$
/**
- * The Lucene field for notes
+ * The Lucene field for the notes
*/
- protected static final String FIELD_NOTES = "note"; //$NON-NLS-1$
+ protected static final String FIELD_NOTE = "note"; //$NON-NLS-1$
/**
* The Book that we are indexing
More information about the jsword-svn
mailing list