[jsword-svn] r1376 - in trunk: bibledesktop/src/main/java/org/crosswire/bibledesktop/display/basic bibledesktop/src/main/java/org/crosswire/bibledesktop/passage common/src/main/java/org/crosswire/common/diff jsword/src/main/java/org/crosswire/jsword/book jsword/src/main/java/org/crosswire/jsword/book/study jsword/src/main/java/org/crosswire/jsword/index jsword/src/main/java/org/crosswire/jsword/index/lucene jsword-limbo/src/main/java/org/crosswire/jsword/book
dmsmith at www.crosswire.org
dmsmith at www.crosswire.org
Fri Jun 1 11:27:02 MST 2007
Author: dmsmith
Date: 2007-06-01 11:27:01 -0700 (Fri, 01 Jun 2007)
New Revision: 1376
Added:
trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.properties
trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java
Modified:
trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/display/basic/SplitBookDataDisplay.java
trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/passage/KeyTreeModel.java
trunk/common/src/main/java/org/crosswire/common/diff/Distance.java
trunk/common/src/main/java/org/crosswire/common/diff/package.html
trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Openness.java
trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Strongs.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.properties
trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsMapSet.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/package.html
Log:
More work on indexing additional fields.
Start of a Strong's Number study tool.
Modified: trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/display/basic/SplitBookDataDisplay.java
===================================================================
--- trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/display/basic/SplitBookDataDisplay.java 2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/display/basic/SplitBookDataDisplay.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -17,7 +17,7 @@
* Copyright: 2005
* The copyright to this program is held by it's authors.
*
- * ID: $Id$
+ * ID: $Id:SplitBookDataDisplay.java 1369 2007-06-01 13:35:27Z dmsmith $
*/
package org.crosswire.bibledesktop.display.basic;
Modified: trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/passage/KeyTreeModel.java
===================================================================
--- trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/passage/KeyTreeModel.java 2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/passage/KeyTreeModel.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -17,7 +17,7 @@
* Copyright: 2005
* The copyright to this program is held by it's authors.
*
- * ID: $Id$
+ * ID: $Id:KeyTreeModel.java 1253 2007-03-16 21:16:26Z dmsmith $
*/
package org.crosswire.bibledesktop.passage;
Modified: trunk/common/src/main/java/org/crosswire/common/diff/Distance.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/diff/Distance.java 2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/common/src/main/java/org/crosswire/common/diff/Distance.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -17,7 +17,7 @@
* Copyright: 2007
* The copyright to this program is held by it's authors.
*
- * ID: $Id: org.eclipse.jdt.ui.prefs 1178 2006-11-06 12:48:02Z dmsmith $
+ * ID: $Id$
*/
package org.crosswire.common.diff;
Property changes on: trunk/common/src/main/java/org/crosswire/common/diff/Distance.java
___________________________________________________________________
Name: svn:kewords
- Author Date Id Revision
Name: svn:keywords
+ Author Date Id Revision
Property changes on: trunk/common/src/main/java/org/crosswire/common/diff/package.html
___________________________________________________________________
Name: svn:keywords
+ Author Date Id Revision
Name: svn:eol-style
+ native
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.java 2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -45,28 +45,12 @@
static final Msg MISSING_VERSE = new Msg("OSISUtil.MissingVerse"); //$NON-NLS-1$
static final Msg OSIS_BADID = new Msg("OSISUtil.OSISBadID"); //$NON-NLS-1$
- static final Msg OPEN_UNKNOWN = new Msg("Openness.Unknown"); //$NON-NLS-1$
- static final Msg OPEN_PD = new Msg("Openness.PD"); //$NON-NLS-1$
- static final Msg OPEN_FREE = new Msg("Openness.Free"); //$NON-NLS-1$
- static final Msg OPEN_COPYABLE = new Msg("Openness.Copyable"); //$NON-NLS-1$
- static final Msg OPEN_COMMERCIAL = new Msg("Openness.Commercial"); //$NON-NLS-1$
-
static final Msg BOOK_METADATA_SET_OTHER = new Msg("BookSet.Other"); //$NON-NLS-1$
static final Msg STRONGS_GREEK = new Msg("Strongs.Greek"); //$NON-NLS-1$
static final Msg STRONGS_HEBREW = new Msg("Strongs.Hebrew"); //$NON-NLS-1$
static final Msg STRONGS_PARSING = new Msg("Strongs.Parsing"); //$NON-NLS-1$
- static final Msg STRONGS_ERROR_PARSE = new Msg("Strongs.ErrorParse"); //$NON-NLS-1$
- static final Msg STRONGS_ERROR_NUMBER = new Msg("Strongs.ErrorNumber"); //$NON-NLS-1$
- static final Msg STRONGS_ERROR_HEBREW = new Msg("Strongs.ErrorHebrew"); //$NON-NLS-1$
- static final Msg STRONGS_ERROR_GREEK = new Msg("Strongs.ErrorGreek"); //$NON-NLS-1$
- static final Msg STRONGS_ERROR_PARSING = new Msg("Strongs.ErrorParsing"); //$NON-NLS-1$
- static final Msg STRONGS_ERROR_TYPE = new Msg("Strongs.ErrorType"); //$NON-NLS-1$
-
- static final Msg ERROR_MIXED = new Msg("SentenceUtil.ErrorMixed"); //$NON-NLS-1$
- static final Msg ERROR_BADCASE = new Msg("SentenceUtil.ErrorBadcase"); //$NON-NLS-1$
-
/**
* Passthrough ctor
*/
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.properties
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.properties 2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.properties 2007-06-01 18:27:01 UTC (rev 1376)
@@ -18,24 +18,7 @@
OSISUtil.MissingVerse=Verse element could not be found
OSISUtil.OSISBadID=OsisID not valid: {0}
-Openness.Unknown=Unknown
-Openness.PD=Public Domain
-Openness.Free=Free
-Openness.Copyable=Copyable
-Openness.Commercial=Commercial
-
BookSet.Other=Other
-Strongs.Greek=Greek:
-Strongs.Hebrew=Hebrew:
-Strongs.Parsing=Parsing:
-
-Strongs.ErrorParse=Strongs number must be of the form <n>, <0n> or (n) where n is a number. Given ''{0}''
-Strongs.ErrorNumber=Could not get a number from ''{0}''
-Strongs.ErrorHebrew=Hebrew numbers must be between 0 and {0,number,integer}. Given {1,number,integer}
-Strongs.ErrorGreek=Greek numbers must be between 0 and {0,number,integer}. Given {1,number,integer}
-Strongs.ErrorParsing=Parsing numbers must be greater than 0. Given {0,number,integer}
-Strongs.ErrorType=Strongs numbers must have a type in the range, 0-2. Given {0,number,integer}
-
SentenceUtil.ErrorMixed=MIXED case should only exist with LORD''s
SentenceUtil.ErrorBadcase=Case must be 0-3
Added: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,44 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2005
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.book.study;
+
+import org.crosswire.common.util.MsgBase;
+
+/**
+ * Compile safe Msg resource settings.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author Joe Walker [joe at eireneh dot com]
+ */
+final class Msg extends MsgBase
+{
+ static final Msg STRONGS_ERROR_NUMBER = new Msg("Strongs.ErrorNumber"); //$NON-NLS-1$
+
+ /**
+ * Passthrough ctor
+ */
+ private Msg(String name)
+ {
+ super(name);
+ }
+}
Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.java
___________________________________________________________________
Name: svn:keywords
+ Author Date Id Revision
Name: svn:eol-style
+ native
Added: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.properties
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.properties (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.properties 2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,8 @@
+# The naming convention for the keys in the file is ClassName.MessageName
+# Where ClassName is the name of the class using the property.
+# When the resource is used by more than one class it should be the one
+# that the resource is most closely associated.
+# The MessageName should be mixed case, with a leading capital.
+# It should have no spaces or other punctuation (e.g. _, -, ', ...)
+
+Strongs.ErrorNumber=Not a valid Strong''s Number "{0}"
Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.properties
___________________________________________________________________
Name: svn:keywords
+ Author Date Id Revision
Name: svn:eol-style
+ native
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsMapSet.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsMapSet.java 2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsMapSet.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -35,7 +35,6 @@
*
* @see gnu.lgpl.License for license details.
* The copyright to this program is held by it's authors.
- * @author Joe Walker [joe at eireneh dot com]
* @author DM Smith [dmsmith555 at yahoo dot com]
*/
public class StrongsMapSet
@@ -63,7 +62,7 @@
reps = new TreeSet();
map.put(strongsNumber, reps);
}
- reps.add(representation);
+ reps.add(representation.toLowerCase());
}
/**
Added: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,189 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.book.study;
+
+import java.text.DecimalFormat;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.crosswire.jsword.book.BookException;
+
+/**
+ * A Strong's Number is either Greek or Hebrew, where the actual numbers for each start at 1.
+ * This class can parse Strong's Numbers that begin with G, g, H or h and are immediately
+ * followed by a number. That number can have leading 0's. It can be followed by an OSISref
+ * extension of !a, !b, which is ignored.
+ *
+ * <p>The canonical representation of the number is a G or H followed by 4 digits,
+ * with leading 0's as needed.</p>
+ *
+ * <p>Numbers that exist:<ul>
+ * <li>Hebrew: 1-8674
+ * <li>Greek: 1-5624 (but not 1418, 2717, 3203-3302, 4452)
+ * </ul>
+ * </p>
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class StrongsNumber
+{
+ /**
+ * Build an immutable Strong's Number.
+ * Anything that does not match causes a BookException.
+ * @param input a string that needs to be parsed.
+ * @throws BookException
+ */
+ public StrongsNumber(String input) throws BookException
+ {
+ parse(input);
+ validate();
+ }
+
+ /**
+ * Build an immutable Strong's Number.
+ * If the language is not 'G' or 'H' or the number is invalid, a BookException.
+ * @param language
+ * @param strongsNumber
+ * @throws BookException
+ */
+ public StrongsNumber(char language, short strongsNumber) throws BookException
+ {
+ this.language = language;
+ this.strongsNumber = strongsNumber;
+ validate();
+ }
+
+ /**
+ * Return the canonical form of a Strong's Number.
+ * @return the strongsNumber
+ */
+ public String getStrongsNumber()
+ {
+ StringBuffer buf = new StringBuffer(5);
+ buf.append(language);
+ buf.append(ZERO_PAD.format(strongsNumber));
+ return buf.toString();
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#hashCode()
+ */
+ public int hashCode()
+ {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + language;
+ result = prime * result + strongsNumber;
+ return result;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ public boolean equals(Object obj)
+ {
+ if (this == obj)
+ {
+ return true;
+ }
+
+ if (obj == null || getClass() != obj.getClass())
+ {
+ return false;
+ }
+
+ final StrongsNumber other = (StrongsNumber) obj;
+
+ return language == other.language && strongsNumber == other.strongsNumber;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ public String toString()
+ {
+ return getStrongsNumber();
+ }
+
+ /**
+ * Do the actual parsing. Anything that does not match causes a BookException.
+ * @param input
+ * @throws BookException
+ */
+ private void parse(String input) throws BookException
+ {
+ String text = input;
+
+ // Does it match
+ Matcher m = STRONGS_PATTERN.matcher(text);
+ if (!m.lookingAt())
+ {
+ throw new BookException(Msg.STRONGS_ERROR_NUMBER, new Object[] { input });
+ }
+
+ String lang = m.group(1);
+ language = lang.charAt(0);
+ switch (language)
+ {
+ case 'g':
+ language = 'G';
+ break;
+ case 'h':
+ language = 'H';
+ break;
+ }
+
+ // Get the number after the G or H
+ strongsNumber = Short.parseShort(m.group(2));
+ }
+
+ private void validate() throws BookException
+ {
+ if (language != 'G' && language != 'H')
+ {
+ throw new BookException(Msg.STRONGS_ERROR_NUMBER, new Object[] { toString() });
+ }
+
+ if (strongsNumber < 1)
+ {
+ throw new BookException(Msg.STRONGS_ERROR_NUMBER, new Object[] { toString() });
+ }
+ }
+
+ /**
+ * Whether it is Greek (G) or Hebrew (H).
+ */
+ private char language;
+
+ /**
+ * The Strong's Number.
+ */
+ private short strongsNumber;
+
+ /**
+ * The pattern of an acceptable strongs number.
+ */
+ private static final Pattern STRONGS_PATTERN = Pattern.compile("([GgHh])([0-9]+)"); //$NON-NLS-1$
+ private static final DecimalFormat ZERO_PAD = new DecimalFormat("0000"); //$NON-NLS-1$
+}
Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java
___________________________________________________________________
Name: svn:keywords
+ Author Date Id Revision
Name: svn:eol-style
+ native
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,46 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * A specialized analyzer that normalizes Strong's Numbers.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class KeyAnalyzer extends Analyzer
+{
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+ */
+ public TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ return new KeyFilter(new KeywordTokenizer(reader));
+ }
+}
Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java
___________________________________________________________________
Name: svn:keywords
+ Author Date Id Revision
Name: svn:eol-style
+ native
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,55 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * A KeyFilter normalizes Key.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class KeyFilter extends TokenFilter
+{
+ /**
+ * Construct filtering <i>in</i>.
+ */
+ public KeyFilter(TokenStream in)
+ {
+ super(in);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.TokenStream#next()
+ */
+ public final Token next() throws IOException
+ {
+ // TODO(DMS): actually normalize
+ return input.next();
+ }
+}
Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java
___________________________________________________________________
Name: svn:keywords
+ Author Date Id Revision
Name: svn:eol-style
+ native
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java 2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -24,10 +24,9 @@
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.KeywordAnalyzer;
+import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
/**
* A specialized analyzer for Books that analyzes different fields differently.
@@ -41,33 +40,23 @@
public LuceneAnalyzer()
{
+ // The default analysis
+ analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
+
+ // Keywords are normalized to osisIDs
+ analyzer.addAnalyzer(LuceneIndex.FIELD_KEY, new KeyAnalyzer());
+
+ // Strong's Numbers are normalized to a consistent representation
+ analyzer.addAnalyzer(LuceneIndex.FIELD_STRONG, new StrongsNumberAnalyzer());
+
+ // XRefs are normalized from ranges into a list of osisIDs
+ analyzer.addAnalyzer(LuceneIndex.FIELD_XREF, new XRefAnalyzer());
}
public TokenStream tokenStream(String fieldName, Reader reader)
{
- // do not tokenize keys
- if (LuceneIndex.FIELD_KEY.equals(fieldName))
- {
- return KEYWORD.tokenStream(fieldName, reader);
- }
- // Split Strong's Numbers on whitespace
- else if (LuceneIndex.FIELD_STRONG.equals(fieldName))
- {
- return WHITESPACE.tokenStream(fieldName, reader);
- }
- // Split xrefs's on whitespace
- else if (LuceneIndex.FIELD_XREF.equals(fieldName))
- {
- return WHITESPACE.tokenStream(fieldName, reader);
- }
- // just use the standard tokenizer
- else
- {
- return SIMPLE.tokenStream(fieldName, reader);
- }
+ return analyzer.tokenStream(fieldName, reader);
}
- private static final Analyzer KEYWORD = new KeywordAnalyzer();
- private static final Analyzer WHITESPACE = new WhitespaceAnalyzer();
- private static final Analyzer SIMPLE = new SimpleAnalyzer();
+ private PerFieldAnalyzerWrapper analyzer;
}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,46 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
+/**
+ * A specialized analyzer that normalizes JSword keys.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class StrongsNumberAnalyzer extends Analyzer
+{
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+ */
+ public TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ return new StrongsNumberFilter(new WhitespaceTokenizer(reader));
+ }
+}
Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java
___________________________________________________________________
Name: svn:keywords
+ Author Date Id Revision
Name: svn:eol-style
+ native
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,74 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.BookException;
+import org.crosswire.jsword.book.study.StrongsNumber;
+
+/**
+ * A StrongsNumberFilter normalizes Strong's Numbers.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class StrongsNumberFilter extends TokenFilter
+{
+ /**
+ * Construct filtering <i>in</i>.
+ */
+ public StrongsNumberFilter(TokenStream in)
+ {
+ super(in);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.TokenStream#next()
+ */
+ public final Token next() throws IOException
+ {
+ Token token = input.next();
+ if (token == null)
+ {
+ return null;
+ }
+
+ try
+ {
+ String s = new StrongsNumber(token.termText()).getStrongsNumber();
+ if (!s.equals(token.termText()))
+ {
+ token.setTermText(s);
+ }
+ return token;
+ }
+ catch (BookException e)
+ {
+ throw new IOException(e.getDetailedMessage());
+ }
+ }
+}
Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java
___________________________________________________________________
Name: svn:keywords
+ Author Date Id Revision
Name: svn:eol-style
+ native
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,46 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
+/**
+ * A specialized analyzer that normalizes Strong's Numbers.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class XRefAnalyzer extends Analyzer
+{
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+ */
+ public TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ return new KeyFilter(new WhitespaceTokenizer(reader));
+ }
+}
Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java
___________________________________________________________________
Name: svn:keywords
+ Author Date Id Revision
Name: svn:eol-style
+ native
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,55 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * A KeyFilter normalizes OSISrefs.
+ *
+ * @see gnu.lgpl.License for license details.
+ * The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class XRefFilter extends TokenFilter
+{
+ /**
+ * Construct filtering <i>in</i>.
+ */
+ public XRefFilter(TokenStream in)
+ {
+ super(in);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.TokenStream#next()
+ */
+ public final Token next() throws IOException
+ {
+ // TODO(DMS): actually normalize
+ return input.next();
+ }
+}
Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java
___________________________________________________________________
Name: svn:keywords
+ Author Date Id Revision
Name: svn:eol-style
+ native
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/package.html
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/package.html 2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/package.html 2007-06-01 18:27:01 UTC (rev 1376)
@@ -1,9 +1,9 @@
-<html>
-<body>
-
-<p>
- Abstraction of an Index.
-</p>
-
-</body>
-</html>
+<html>
+<body>
+
+<p>
+ Abstraction of an Index.
+</p>
+
+</body>
+</html>
Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/package.html
___________________________________________________________________
Name: svn:keywords
+ Author Date Id Revision
Name: svn:eol-style
+ native
Modified: trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Openness.java
===================================================================
--- trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Openness.java 2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Openness.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -38,27 +38,27 @@
/**
* If the data of unknown distribution status
*/
- public static final Openness UNKNOWN = new Openness(Msg.OPEN_UNKNOWN);
+ public static final Openness UNKNOWN = new Openness(LimboMsg.OPEN_UNKNOWN);
/**
* If the data free of copyright restrictions
*/
- public static final Openness PD = new Openness(Msg.OPEN_PD);
+ public static final Openness PD = new Openness(LimboMsg.OPEN_PD);
/**
* Does the data have a license that permits free use
*/
- public static final Openness FREE = new Openness(Msg.OPEN_FREE);
+ public static final Openness FREE = new Openness(LimboMsg.OPEN_FREE);
/**
* Is the data freely redistributable
*/
- public static final Openness COPYABLE = new Openness(Msg.OPEN_COPYABLE);
+ public static final Openness COPYABLE = new Openness(LimboMsg.OPEN_COPYABLE);
/**
* Is the data sold for commercial profit
*/
- public static final Openness COMMERCIAL = new Openness(Msg.OPEN_COMMERCIAL);
+ public static final Openness COMMERCIAL = new Openness(LimboMsg.OPEN_COMMERCIAL);
/**
* Prevent anyone else from doing this
Modified: trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Strongs.java
===================================================================
--- trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Strongs.java 2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Strongs.java 2007-06-01 18:27:01 UTC (rev 1376)
@@ -58,7 +58,7 @@
// It's a Greek or Hebrew number
if (desc.charAt(desc.length() - 1) != '>')
{
- throw new BookException(Msg.STRONGS_ERROR_PARSE, new Object[] { desc });
+ throw new BookException(LimboMsg.STRONGS_ERROR_PARSE, new Object[] { desc });
}
if (desc.charAt(1) == '0')
@@ -75,17 +75,17 @@
// It's a parsing number
if (desc.charAt(desc.length() - 1) != ')')
{
- throw new BookException(Msg.STRONGS_ERROR_PARSE, new Object[] { desc });
+ throw new BookException(LimboMsg.STRONGS_ERROR_PARSE, new Object[] { desc });
}
set(PARSING, Integer.parseInt(desc.substring(1, desc.length() - 1)));
}
- throw new BookException(Msg.STRONGS_ERROR_PARSE, new Object[] { desc });
+ throw new BookException(LimboMsg.STRONGS_ERROR_PARSE, new Object[] { desc });
}
catch (NumberFormatException ex)
{
- throw new BookException(Msg.STRONGS_ERROR_NUMBER, new Object[] { desc });
+ throw new BookException(LimboMsg.STRONGS_ERROR_NUMBER, new Object[] { desc });
}
}
@@ -165,11 +165,11 @@
switch (type)
{
case GREEK:
- return Msg.STRONGS_GREEK.toString() + number;
+ return LimboMsg.STRONGS_GREEK.toString() + number;
case HEBREW:
- return Msg.STRONGS_HEBREW.toString() + number;
+ return LimboMsg.STRONGS_HEBREW.toString() + number;
case PARSING:
- return Msg.STRONGS_PARSING.toString() + number;
+ return LimboMsg.STRONGS_PARSING.toString() + number;
default:
assert false : type;
return "!Error!"; //$NON-NLS-1$
@@ -247,14 +247,14 @@
case HEBREW:
if (number > HEBREW_MAX || number < 1)
{
- throw new BookException(Msg.STRONGS_ERROR_HEBREW, new Object[] { new Integer(HEBREW_MAX), new Integer(number) });
+ throw new BookException(LimboMsg.STRONGS_ERROR_HEBREW, new Object[] { new Integer(HEBREW_MAX), new Integer(number) });
}
break;
case GREEK:
if (number > GREEK_MAX || number < 1)
{
- throw new BookException(Msg.STRONGS_ERROR_GREEK, new Object[] { new Integer(GREEK_MAX), new Integer(number) });
+ throw new BookException(LimboMsg.STRONGS_ERROR_GREEK, new Object[] { new Integer(GREEK_MAX), new Integer(number) });
}
// We have not checked for 1418, 2717, 3203-3302, 4452 which do not appear to
// but legal numbers for Greek words. Should we do this?
@@ -263,14 +263,14 @@
case PARSING:
if (number < 1)
{
- throw new BookException(Msg.STRONGS_ERROR_PARSING, new Object[] { new Integer(number) });
+ throw new BookException(LimboMsg.STRONGS_ERROR_PARSING, new Object[] { new Integer(number) });
}
// The correct range seems to be: 0, 5625-5773, 8675-8809, but not 5626, 5653, 5687, 5767, 8679
// I'm not sure if this is 100% correct so I'll not check it at the mo.
break;
default:
- throw new BookException(Msg.STRONGS_ERROR_TYPE, new Object[] { new Integer(number) });
+ throw new BookException(LimboMsg.STRONGS_ERROR_TYPE, new Object[] { new Integer(number) });
}
}
More information about the jsword-svn
mailing list