[jsword-svn] r1376 - in trunk: bibledesktop/src/main/java/org/crosswire/bibledesktop/display/basic bibledesktop/src/main/java/org/crosswire/bibledesktop/passage common/src/main/java/org/crosswire/common/diff jsword/src/main/java/org/crosswire/jsword/book jsword/src/main/java/org/crosswire/jsword/book/study jsword/src/main/java/org/crosswire/jsword/index jsword/src/main/java/org/crosswire/jsword/index/lucene jsword-limbo/src/main/java/org/crosswire/jsword/book

dmsmith at www.crosswire.org dmsmith at www.crosswire.org
Fri Jun 1 11:27:02 MST 2007


Author: dmsmith
Date: 2007-06-01 11:27:01 -0700 (Fri, 01 Jun 2007)
New Revision: 1376

Added:
   trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.java
   trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.properties
   trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java
Modified:
   trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/display/basic/SplitBookDataDisplay.java
   trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/passage/KeyTreeModel.java
   trunk/common/src/main/java/org/crosswire/common/diff/Distance.java
   trunk/common/src/main/java/org/crosswire/common/diff/package.html
   trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Openness.java
   trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Strongs.java
   trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.java
   trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.properties
   trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsMapSet.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java
   trunk/jsword/src/main/java/org/crosswire/jsword/index/package.html
Log:
More work on indexing additional fields.
Start of a Strong's Number study tool.

Modified: trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/display/basic/SplitBookDataDisplay.java
===================================================================
--- trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/display/basic/SplitBookDataDisplay.java	2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/display/basic/SplitBookDataDisplay.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -17,7 +17,7 @@
  * Copyright: 2005
  *     The copyright to this program is held by it's authors.
  *
- * ID: $Id$
+ * ID: $Id:SplitBookDataDisplay.java 1369 2007-06-01 13:35:27Z dmsmith $
  */
 package org.crosswire.bibledesktop.display.basic;
 

Modified: trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/passage/KeyTreeModel.java
===================================================================
--- trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/passage/KeyTreeModel.java	2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/passage/KeyTreeModel.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -17,7 +17,7 @@
  * Copyright: 2005
  *     The copyright to this program is held by it's authors.
  *
- * ID: $Id$
+ * ID: $Id:KeyTreeModel.java 1253 2007-03-16 21:16:26Z dmsmith $
  */
 package org.crosswire.bibledesktop.passage;
 

Modified: trunk/common/src/main/java/org/crosswire/common/diff/Distance.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/diff/Distance.java	2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/common/src/main/java/org/crosswire/common/diff/Distance.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -17,7 +17,7 @@
  * Copyright: 2007
  *     The copyright to this program is held by it's authors.
  *
- * ID: $Id: org.eclipse.jdt.ui.prefs 1178 2006-11-06 12:48:02Z dmsmith $
+ * ID: $Id$
  */
 
 package org.crosswire.common.diff;


Property changes on: trunk/common/src/main/java/org/crosswire/common/diff/Distance.java
___________________________________________________________________
Name: svn:kewords
   - Author Date Id Revision
Name: svn:keywords
   + Author Date Id Revision


Property changes on: trunk/common/src/main/java/org/crosswire/common/diff/package.html
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.java	2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -45,28 +45,12 @@
     static final Msg MISSING_VERSE = new Msg("OSISUtil.MissingVerse"); //$NON-NLS-1$
     static final Msg OSIS_BADID = new Msg("OSISUtil.OSISBadID"); //$NON-NLS-1$
 
-    static final Msg OPEN_UNKNOWN = new Msg("Openness.Unknown"); //$NON-NLS-1$
-    static final Msg OPEN_PD = new Msg("Openness.PD"); //$NON-NLS-1$
-    static final Msg OPEN_FREE = new Msg("Openness.Free"); //$NON-NLS-1$
-    static final Msg OPEN_COPYABLE = new Msg("Openness.Copyable"); //$NON-NLS-1$
-    static final Msg OPEN_COMMERCIAL = new Msg("Openness.Commercial"); //$NON-NLS-1$
-
     static final Msg BOOK_METADATA_SET_OTHER = new Msg("BookSet.Other"); //$NON-NLS-1$
 
     static final Msg STRONGS_GREEK = new Msg("Strongs.Greek"); //$NON-NLS-1$
     static final Msg STRONGS_HEBREW = new Msg("Strongs.Hebrew"); //$NON-NLS-1$
     static final Msg STRONGS_PARSING = new Msg("Strongs.Parsing"); //$NON-NLS-1$
 
-    static final Msg STRONGS_ERROR_PARSE = new Msg("Strongs.ErrorParse"); //$NON-NLS-1$
-    static final Msg STRONGS_ERROR_NUMBER = new Msg("Strongs.ErrorNumber"); //$NON-NLS-1$
-    static final Msg STRONGS_ERROR_HEBREW = new Msg("Strongs.ErrorHebrew"); //$NON-NLS-1$
-    static final Msg STRONGS_ERROR_GREEK = new Msg("Strongs.ErrorGreek"); //$NON-NLS-1$
-    static final Msg STRONGS_ERROR_PARSING = new Msg("Strongs.ErrorParsing"); //$NON-NLS-1$
-    static final Msg STRONGS_ERROR_TYPE = new Msg("Strongs.ErrorType"); //$NON-NLS-1$
-
-    static final Msg ERROR_MIXED = new Msg("SentenceUtil.ErrorMixed"); //$NON-NLS-1$
-    static final Msg ERROR_BADCASE = new Msg("SentenceUtil.ErrorBadcase"); //$NON-NLS-1$
-
     /**
      * Passthrough ctor
      */

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.properties
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.properties	2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.properties	2007-06-01 18:27:01 UTC (rev 1376)
@@ -18,24 +18,7 @@
 OSISUtil.MissingVerse=Verse element could not be found
 OSISUtil.OSISBadID=OsisID not valid: {0}
 
-Openness.Unknown=Unknown
-Openness.PD=Public Domain
-Openness.Free=Free
-Openness.Copyable=Copyable
-Openness.Commercial=Commercial
-
 BookSet.Other=Other
 
-Strongs.Greek=Greek:
-Strongs.Hebrew=Hebrew:
-Strongs.Parsing=Parsing:
-
-Strongs.ErrorParse=Strongs number must be of the form <n>, <0n> or (n) where n is a number. Given ''{0}''
-Strongs.ErrorNumber=Could not get a number from ''{0}''
-Strongs.ErrorHebrew=Hebrew numbers must be between 0 and {0,number,integer}. Given {1,number,integer}
-Strongs.ErrorGreek=Greek numbers must be between 0 and {0,number,integer}. Given {1,number,integer}
-Strongs.ErrorParsing=Parsing numbers must be greater than 0. Given {0,number,integer}
-Strongs.ErrorType=Strongs numbers must have a type in the range, 0-2. Given {0,number,integer}
-
 SentenceUtil.ErrorMixed=MIXED case should only exist with LORD''s
 SentenceUtil.ErrorBadcase=Case must be 0-3

Added: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,44 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2005
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.book.study;
+
+import org.crosswire.common.util.MsgBase;
+
+/**
+ * Compile safe Msg resource settings.
+ * 
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author Joe Walker [joe at eireneh dot com]
+ */
+final class Msg extends MsgBase
+{
+    static final Msg STRONGS_ERROR_NUMBER = new Msg("Strongs.ErrorNumber"); //$NON-NLS-1$
+
+    /**
+     * Passthrough ctor
+     */
+    private Msg(String name)
+    {
+        super(name);
+    }
+}


Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.java
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Added: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.properties
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.properties	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.properties	2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,8 @@
+# The naming convention for the keys in the file is ClassName.MessageName
+# Where ClassName is the name of the class using the property.
+# When the resource is used by more than one class it should be the one
+# that the resource is most closely associated.
+# The MessageName should be mixed case, with a leading capital.
+# It should have no spaces or other punctuation (e.g. _, -, ', ...)
+
+Strongs.ErrorNumber=Not a valid Strong''s Number "{0}"


Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/Msg.properties
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsMapSet.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsMapSet.java	2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsMapSet.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -35,7 +35,6 @@
  * 
  * @see gnu.lgpl.License for license details.
  *      The copyright to this program is held by it's authors.
- * @author Joe Walker [joe at eireneh dot com]
  * @author DM Smith [dmsmith555 at yahoo dot com]
  */
 public class StrongsMapSet
@@ -63,7 +62,7 @@
             reps = new TreeSet();
             map.put(strongsNumber, reps);
         }
-        reps.add(representation);
+        reps.add(representation.toLowerCase());
     }
 
     /**

Added: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,189 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.book.study;
+
+import java.text.DecimalFormat;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.crosswire.jsword.book.BookException;
+
+/**
+ * A Strong's Number is either Greek or Hebrew, where the actual numbers for each start at 1.
+ * This class can parse Strong's Numbers that begin with G, g, H or h and are immediately
+ * followed by a number. That number can have leading 0's. It can be followed by an OSISref
+ * extension of !a, !b, which is ignored.
+ * 
+ * <p>The canonical representation of the number is a G or H followed by 4 digits,
+ * with leading 0's as needed.</p>
+ * 
+ * <p>Numbers that exist:<ul>
+ * <li>Hebrew: 1-8674
+ * <li>Greek: 1-5624 (but not 1418, 2717, 3203-3302, 4452)
+ * </ul>
+ * </p>
+ * 
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class StrongsNumber
+{
+    /**
+     * Build an immutable Strong's Number.
+     * Anything that does not match causes a BookException.
+     * @param input a string that needs to be parsed.
+     * @throws BookException
+     */
+    public StrongsNumber(String input) throws BookException
+    {
+        parse(input);
+        validate();
+    }
+
+    /**
+     * Build an immutable Strong's Number.
+     * If the language is not 'G' or 'H' or the number is invalid, a BookException.
+     * @param language
+     * @param strongsNumber
+     * @throws BookException
+     */
+    public StrongsNumber(char language, short strongsNumber) throws BookException
+    {
+        this.language = language;
+        this.strongsNumber = strongsNumber;
+        validate();
+    }
+
+    /**
+     * Return the canonical form of a Strong's Number.
+     * @return the strongsNumber
+     */
+    public String getStrongsNumber()
+    {
+        StringBuffer buf = new StringBuffer(5);
+        buf.append(language);
+        buf.append(ZERO_PAD.format(strongsNumber));
+        return buf.toString();
+    }
+
+    /* (non-Javadoc)
+     * @see java.lang.Object#hashCode()
+     */
+    public int hashCode()
+    {
+        final int prime = 31;
+        int result = 1;
+        result = prime * result + language;
+        result = prime * result + strongsNumber;
+        return result;
+    }
+
+    /* (non-Javadoc)
+     * @see java.lang.Object#equals(java.lang.Object)
+     */
+    public boolean equals(Object obj)
+    {
+        if (this == obj)
+        {
+            return true;
+        }
+
+        if (obj == null || getClass() != obj.getClass())
+        {
+            return false;
+        }
+
+        final StrongsNumber other = (StrongsNumber) obj;
+
+        return language == other.language && strongsNumber == other.strongsNumber;
+    }
+
+    /* (non-Javadoc)
+     * @see java.lang.Object#toString()
+     */
+    public String toString()
+    {
+        return getStrongsNumber();
+    }
+
+    /**
+     * Do the actual parsing. Anything that does not match causes a BookException.
+     * @param input
+     * @throws BookException
+     */
+    private void parse(String input) throws BookException
+    {
+        String text = input;
+
+        // Does it match
+        Matcher m = STRONGS_PATTERN.matcher(text);
+        if (!m.lookingAt())
+        {
+            throw new BookException(Msg.STRONGS_ERROR_NUMBER, new Object[] { input });
+        }
+
+        String lang = m.group(1);
+        language = lang.charAt(0);
+        switch (language)
+        {
+            case 'g':
+                language = 'G';
+                break;
+            case 'h':
+                language = 'H';
+                break;
+        }
+
+        // Get the number after the G or H
+        strongsNumber = Short.parseShort(m.group(2));
+    }
+
+    private void validate() throws BookException
+    {
+        if (language != 'G' && language != 'H')
+        {
+            throw new BookException(Msg.STRONGS_ERROR_NUMBER, new Object[] { toString() });
+        }
+
+        if (strongsNumber < 1)
+        {
+            throw new BookException(Msg.STRONGS_ERROR_NUMBER, new Object[] { toString() });
+        }
+    }
+
+    /**
+     * Whether it is Greek (G) or Hebrew (H).
+     */
+    private char language;
+
+    /**
+     * The Strong's Number.
+     */
+    private short strongsNumber;
+
+    /**
+     * The pattern of an acceptable strongs number.
+     */
+    private static final Pattern STRONGS_PATTERN = Pattern.compile("([GgHh])([0-9]+)"); //$NON-NLS-1$
+    private static final DecimalFormat ZERO_PAD = new DecimalFormat("0000"); //$NON-NLS-1$
+}


Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/book/study/StrongsNumber.java
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,46 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * A specialized analyzer that normalizes Strong's Numbers.
+ *
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class KeyAnalyzer extends Analyzer
+{
+    /* (non-Javadoc)
+     * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+     */
+    public TokenStream tokenStream(String fieldName, Reader reader)
+    {
+        return new KeyFilter(new KeywordTokenizer(reader));
+    }
+}


Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyAnalyzer.java
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,55 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * A KeyFilter normalizes Key.
+ *
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class KeyFilter extends TokenFilter
+{
+    /**
+     * Construct filtering <i>in</i>.
+     */
+    public KeyFilter(TokenStream in)
+    {
+      super(in);
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.lucene.analysis.TokenStream#next()
+     */
+    public final Token next() throws IOException
+    {
+        // TODO(DMS): actually normalize
+        return input.next();
+    }
+}


Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/KeyFilter.java
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java	2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -24,10 +24,9 @@
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.KeywordAnalyzer;
+import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
 import org.apache.lucene.analysis.SimpleAnalyzer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
 
 /**
  * A specialized analyzer for Books that analyzes different fields differently.
@@ -41,33 +40,23 @@
 
     public LuceneAnalyzer()
     {
+        // The default analysis
+        analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
+
+        // Keywords are normalized to osisIDs
+        analyzer.addAnalyzer(LuceneIndex.FIELD_KEY, new KeyAnalyzer());
+
+        // Strong's Numbers are normalized to a consistent representation
+        analyzer.addAnalyzer(LuceneIndex.FIELD_STRONG, new StrongsNumberAnalyzer());
+
+        // XRefs are normalized from ranges into a list of osisIDs
+        analyzer.addAnalyzer(LuceneIndex.FIELD_XREF, new XRefAnalyzer());
     }
 
     public TokenStream tokenStream(String fieldName, Reader reader)
     {
-        // do not tokenize keys
-        if (LuceneIndex.FIELD_KEY.equals(fieldName))
-        {
-            return KEYWORD.tokenStream(fieldName, reader);
-        }
-        // Split Strong's Numbers on whitespace
-        else if (LuceneIndex.FIELD_STRONG.equals(fieldName))
-        {
-            return WHITESPACE.tokenStream(fieldName, reader);
-        }
-        // Split xrefs's on whitespace
-        else if (LuceneIndex.FIELD_XREF.equals(fieldName))
-        {
-            return WHITESPACE.tokenStream(fieldName, reader);
-        }
-        // just use the standard tokenizer
-        else
-        {
-            return SIMPLE.tokenStream(fieldName, reader);
-        }
+        return analyzer.tokenStream(fieldName, reader);
     }
 
-    private static final Analyzer KEYWORD = new KeywordAnalyzer();
-    private static final Analyzer WHITESPACE = new WhitespaceAnalyzer();
-    private static final Analyzer SIMPLE = new SimpleAnalyzer();
+    private PerFieldAnalyzerWrapper analyzer;
 }

Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,46 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
+/**
+ * A specialized analyzer that normalizes JSword keys.
+ *
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class StrongsNumberAnalyzer extends Analyzer
+{
+    /* (non-Javadoc)
+     * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+     */
+    public TokenStream tokenStream(String fieldName, Reader reader)
+    {
+        return new StrongsNumberFilter(new WhitespaceTokenizer(reader));
+    }
+}


Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberAnalyzer.java
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,74 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.book.BookException;
+import org.crosswire.jsword.book.study.StrongsNumber;
+
+/**
+ * A StrongsNumberFilter normalizes Strong's Numbers.
+ *
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class StrongsNumberFilter extends TokenFilter
+{
+    /**
+     * Construct filtering <i>in</i>.
+     */
+    public StrongsNumberFilter(TokenStream in)
+    {
+      super(in);
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.lucene.analysis.TokenStream#next()
+     */
+    public final Token next() throws IOException
+    {
+        Token token = input.next();
+        if (token == null)
+        {
+            return null;
+        }
+
+        try
+        {
+            String s = new StrongsNumber(token.termText()).getStrongsNumber();
+            if (!s.equals(token.termText()))
+            {
+                token.setTermText(s);
+            }
+            return token;
+        }
+        catch (BookException e)
+        {
+            throw new IOException(e.getDetailedMessage());
+        }
+    }
+}


Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/StrongsNumberFilter.java
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,46 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
+/**
+ * A specialized analyzer that normalizes Strong's Numbers.
+ *
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class XRefAnalyzer extends Analyzer
+{
+    /* (non-Javadoc)
+     * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader)
+     */
+    public TokenStream tokenStream(String fieldName, Reader reader)
+    {
+        return new KeyFilter(new WhitespaceTokenizer(reader));
+    }
+}


Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefAnalyzer.java
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java	                        (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -0,0 +1,55 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ *       http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ *      Free Software Foundation, Inc.
+ *      59 Temple Place - Suite 330
+ *      Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ *     The copyright to this program is held by it's authors.
+ *
+ * ID: $Id$
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * A KeyFilter normalizes OSISrefs.
+ *
+ * @see gnu.lgpl.License for license details.
+ *      The copyright to this program is held by it's authors.
+ * @author DM Smith [dmsmith555 at yahoo dot com]
+ */
+public class XRefFilter extends TokenFilter
+{
+    /**
+     * Construct filtering <i>in</i>.
+     */
+    public XRefFilter(TokenStream in)
+    {
+      super(in);
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.lucene.analysis.TokenStream#next()
+     */
+    public final Token next() throws IOException
+    {
+        // TODO(DMS): actually normalize
+        return input.next();
+    }
+}


Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/XRefFilter.java
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/package.html
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/package.html	2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/package.html	2007-06-01 18:27:01 UTC (rev 1376)
@@ -1,9 +1,9 @@
-<html>
-<body>
-
-<p>
-  Abstraction of an Index.
-</p>
-
-</body>
-</html>
+<html>
+<body>
+
+<p>
+  Abstraction of an Index.
+</p>
+
+</body>
+</html>


Property changes on: trunk/jsword/src/main/java/org/crosswire/jsword/index/package.html
___________________________________________________________________
Name: svn:keywords
   + Author Date Id Revision
Name: svn:eol-style
   + native

Modified: trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Openness.java
===================================================================
--- trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Openness.java	2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Openness.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -38,27 +38,27 @@
     /**
      * If the data of unknown distribution status
      */
-    public static final Openness UNKNOWN = new Openness(Msg.OPEN_UNKNOWN);
+    public static final Openness UNKNOWN = new Openness(LimboMsg.OPEN_UNKNOWN);
 
     /**
      * If the data free of copyright restrictions
      */
-    public static final Openness PD = new Openness(Msg.OPEN_PD);
+    public static final Openness PD = new Openness(LimboMsg.OPEN_PD);
 
     /**
      * Does the data have a license that permits free use
      */
-    public static final Openness FREE = new Openness(Msg.OPEN_FREE);
+    public static final Openness FREE = new Openness(LimboMsg.OPEN_FREE);
 
     /**
      * Is the data freely redistributable
      */
-    public static final Openness COPYABLE = new Openness(Msg.OPEN_COPYABLE);
+    public static final Openness COPYABLE = new Openness(LimboMsg.OPEN_COPYABLE);
 
     /**
      * Is the data sold for commercial profit
      */
-    public static final Openness COMMERCIAL = new Openness(Msg.OPEN_COMMERCIAL);
+    public static final Openness COMMERCIAL = new Openness(LimboMsg.OPEN_COMMERCIAL);
 
     /**
      * Prevent anyone else from doing this

Modified: trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Strongs.java
===================================================================
--- trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Strongs.java	2007-06-01 14:41:49 UTC (rev 1375)
+++ trunk/jsword-limbo/src/main/java/org/crosswire/jsword/book/Strongs.java	2007-06-01 18:27:01 UTC (rev 1376)
@@ -58,7 +58,7 @@
                 // It's a Greek or Hebrew number
                 if (desc.charAt(desc.length() - 1) != '>')
                 {
-                    throw new BookException(Msg.STRONGS_ERROR_PARSE, new Object[] { desc });
+                    throw new BookException(LimboMsg.STRONGS_ERROR_PARSE, new Object[] { desc });
                 }
 
                 if (desc.charAt(1) == '0')
@@ -75,17 +75,17 @@
                 // It's a parsing number
                 if (desc.charAt(desc.length() - 1) != ')')
                 {
-                    throw new BookException(Msg.STRONGS_ERROR_PARSE, new Object[] { desc });
+                    throw new BookException(LimboMsg.STRONGS_ERROR_PARSE, new Object[] { desc });
                 }
 
                 set(PARSING, Integer.parseInt(desc.substring(1, desc.length() - 1)));
             }
 
-            throw new BookException(Msg.STRONGS_ERROR_PARSE, new Object[] { desc });
+            throw new BookException(LimboMsg.STRONGS_ERROR_PARSE, new Object[] { desc });
         }
         catch (NumberFormatException ex)
         {
-            throw new BookException(Msg.STRONGS_ERROR_NUMBER, new Object[] { desc });
+            throw new BookException(LimboMsg.STRONGS_ERROR_NUMBER, new Object[] { desc });
         }
     }
 
@@ -165,11 +165,11 @@
         switch (type)
         {
         case GREEK:
-            return Msg.STRONGS_GREEK.toString() + number;
+            return LimboMsg.STRONGS_GREEK.toString() + number;
         case HEBREW:
-            return Msg.STRONGS_HEBREW.toString() + number;
+            return LimboMsg.STRONGS_HEBREW.toString() + number;
         case PARSING:
-            return Msg.STRONGS_PARSING.toString() + number;
+            return LimboMsg.STRONGS_PARSING.toString() + number;
         default:
             assert false : type;
             return "!Error!"; //$NON-NLS-1$
@@ -247,14 +247,14 @@
         case HEBREW:
             if (number > HEBREW_MAX || number < 1)
             {
-                throw new BookException(Msg.STRONGS_ERROR_HEBREW, new Object[] { new Integer(HEBREW_MAX), new Integer(number) });
+                throw new BookException(LimboMsg.STRONGS_ERROR_HEBREW, new Object[] { new Integer(HEBREW_MAX), new Integer(number) });
             }
             break;
 
         case GREEK:
             if (number > GREEK_MAX || number < 1)
             {
-                throw new BookException(Msg.STRONGS_ERROR_GREEK, new Object[] { new Integer(GREEK_MAX), new Integer(number) });
+                throw new BookException(LimboMsg.STRONGS_ERROR_GREEK, new Object[] { new Integer(GREEK_MAX), new Integer(number) });
             }
             // We have not checked for 1418, 2717, 3203-3302, 4452 which do not appear to
             // but legal numbers for Greek words. Should we do this?
@@ -263,14 +263,14 @@
         case PARSING:
             if (number < 1)
             {
-                throw new BookException(Msg.STRONGS_ERROR_PARSING, new Object[] { new Integer(number) });
+                throw new BookException(LimboMsg.STRONGS_ERROR_PARSING, new Object[] { new Integer(number) });
             }
             // The correct range seems to be: 0, 5625-5773, 8675-8809, but not 5626, 5653, 5687, 5767, 8679
             // I'm not sure if this is 100% correct so I'll not check it at the mo.
             break;
 
         default:
-            throw new BookException(Msg.STRONGS_ERROR_TYPE, new Object[] { new Integer(number) });
+            throw new BookException(LimboMsg.STRONGS_ERROR_TYPE, new Object[] { new Integer(number) });
         }
     }
 




More information about the jsword-svn mailing list