[jsword-svn] jsword/java/limbo/org/crosswire/jsword/book/search s

jswordcvs at crosswire.org jswordcvs at crosswire.org
Sun May 8 18:29:09 MST 2005


Update of /cvs/jsword/jsword/java/limbo/org/crosswire/jsword/book/search
In directory www.crosswire.org:/tmp/cvs-serv6194/java/limbo/org/crosswire/jsword/book/search

Added Files:
	Thesaurus.properties Grammar.java Thesaurus.java 
	Msg.properties Msg.java ThesaurusFactory.java 
Log Message:
Moved unused code to limbo.
Upgraded support-tools: checkstyle, pmd and findbugs to most recent.
Addressed over 100 issues reported by findbugs and checkstyle.
Resulted in major refactoring of GBFFilter.
Net result is that code size is significantly smaller.

--- NEW FILE: Thesaurus.java ---
package org.crosswire.jsword.book.search;

import java.util.Collection;

import org.crosswire.jsword.book.BookException;

/**
 * A source of synonym data for a given word.
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author Joe Walker [joe at eireneh dot com]
 * @version $Id: Thesaurus.java,v 1.1 2005/05/09 01:29:07 dmsmith Exp $
 */
public interface Thesaurus
{
    /**
     * Return an array of words that are used by this Bible that start with the
     * given string. For example calling:
     * <code>getStartsWith("love")</code> will return something like:
     * { "love", "loves", "lover", "lovely", ... }
     * @param word The word to base your word array on
     * @return An array of words starting with the base
     */
    public Collection getSynonyms(String word) throws BookException;
}

--- NEW FILE: Thesaurus.properties ---

default=org.crosswire.jsword.book.search.lucene.LuceneThesarus

--- NEW FILE: Msg.java ---
package org.crosswire.jsword.book.search;

import org.crosswire.common.util.MsgBase;

/**
 * Compile safe Msg resource settings.
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author Joe Walker [joe at eireneh dot com]
 * @version $Id: Msg.java,v 1.1 2005/05/09 01:29:07 dmsmith Exp $
 */
class Msg extends MsgBase
{
    static final Msg EXAMPLE = new Msg("Example.Example"); //$NON-NLS-1$

    /**
     * Passthrough ctor
     */
    private Msg(String name)
    {
        super(name);
    }
}

--- NEW FILE: Grammar.java ---
package org.crosswire.jsword.book.search;

import org.crosswire.common.util.StringUtil;

/**
 * A class representing various grammatical constructs (in English).
 *
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author Joe Walker [joe at eireneh dot com]
 * @version $Id: Grammar.java,v 1.1 2005/05/09 01:29:07 dmsmith Exp $
 */
public class Grammar
{
    /**
     * Prevent Instansiation
     */
    private Grammar()
    {
    }

    /**
     * Strip of any parts of speech to leave a root word.
     * This class may not be the best place for this code, however I'm not
     * sure if we have a better place for it at the mo. Maybe it should be
     * in passage.PassageUtil?
     * @param word The word to strip
     * @return The root word
     */
    public static String getRoot(String word)
    {
        for (int i = 0; i < ENDINGS.length; i++)
        {
            if (word.endsWith(ENDINGS[i]))
            {
                // Make the assumption that we never have 2 ending on a word
                return word.substring(0, word.length() - ENDINGS[i].length());
            }
        }

        return word;
    }

    /**
     * Is this word one of those small words that can slaughter a DB
     * query. An empty string IS a small word.
     * @param word The word to test
     */
    public static boolean isSmallWord(String word)
    {
        word = word.trim();
        if (word.equals("")) //$NON-NLS-1$
        {
            return true;
        }

        for (int i = 0; i < WORD_FREQ.length; i++)
        {
            if (word.equalsIgnoreCase(WORD_FREQ[i]))
            {
                return true;
            }
        }

        return false;
    }

    /**
     * Is this word one of those small words that can slaughter a DB query
     */
    public static String[] stripSmallWords(String[] words)
    {
        // How many long words are there?
        int long_words = 0;
        for (int i = 0; i < words.length; i++)
        {
            if (!isSmallWord(words[i]))
            {
                long_words++;
            }
        }

        // Create the array with just the long words
        int count = 0;
        String[] retcode = new String[long_words];
        for (int i = 0; i < words.length; i++)
        {
            if (!isSmallWord(words[i]))
            {
                retcode[count++] = words[i];
            }
        }

        return retcode;
    }

    /**
     * Like PassageUtil.tokenize that leaves out the small words
     * @param original The sentance to split up
     * @param delims The word separators
     * @return The long words in the string
     */
    public static String[] tokenizeWithoutSmallWords(String original, String delims)
    {
        String[] words = StringUtil.split(original, delims);
        int small_words = 0;

        for (int i = 0; i < words.length; i++)
        {
            if (Grammar.isSmallWord(words[i]))
            {
                small_words++;
                words[i] = null;
            }
        }

        String [] retcode = new String[words.length - small_words];
        int count = 0;
        for (int i = 0; i < words.length; i++)
        {
            if (words[i] != null)
            {
                retcode[count++] = words[i];
            }
        }

        return retcode;
    }

    /**
     * The Endings a word can have.
     * These are matched in order so there is no point in having "s"
     * before "es" because the second will not be tried.
     */
    private static final String[] ENDINGS =
    {
        "es", //$NON-NLS-1$
        "'s", //$NON-NLS-1$
        "s", //$NON-NLS-1$
        "ing", //$NON-NLS-1$
        "ed", //$NON-NLS-1$
        "er", //$NON-NLS-1$
        "ly", //$NON-NLS-1$
    };

    /**
     * The one hundred most used words, and the instance count
     */
    private static final String[] WORD_FREQ =
    {
        // word     instance count (in AV & NIV)
        "the",      // 119135 //$NON-NLS-1$
        "and",      // 81244 //$NON-NLS-1$
        "of",       // 59417 //$NON-NLS-1$
        "to",       // 43624 //$NON-NLS-1$
        "in",       // 24233 //$NON-NLS-1$
        "he",       // 20088 //$NON-NLS-1$
        "that",     // 18672 //$NON-NLS-1$
        "i",        // 17605 //$NON-NLS-1$
        "a",        // 17439 //$NON-NLS-1$
        "for",      // 16780 //$NON-NLS-1$
        "you",      // 16324 //$NON-NLS-1$
        "his",      // 15438 //$NON-NLS-1$
//      "lord",     // 15319
        "is",       // 14304 //$NON-NLS-1$
        "will",     // 13981 //$NON-NLS-1$
        "they",     // 13942 //$NON-NLS-1$
        "not",      // 12507 //$NON-NLS-1$
        "with",     // 12125 //$NON-NLS-1$
        "him",      // 12058 //$NON-NLS-1$
        "it",       // 11834 //$NON-NLS-1$
        "be",       // 11638 //$NON-NLS-1$
        "them",     // 11608 //$NON-NLS-1$
        "shall",    // 10833 //$NON-NLS-1$
        "all",      // 10333 //$NON-NLS-1$
        "my",       // 9547 //$NON-NLS-1$
        "from",     // 9323 //$NON-NLS-1$
        "was",      // 8530 //$NON-NLS-1$
        "your",     // 8400 //$NON-NLS-1$
//      "god",      // 8381
        "have",     // 8322 //$NON-NLS-1$
        "me",       // 8102 //$NON-NLS-1$
        "but",      // 7991 //$NON-NLS-1$
        "their",    // 7638 //$NON-NLS-1$
        "as",       // 7521 //$NON-NLS-1$
        "who",      // 7425 //$NON-NLS-1$
        "said",     // 7198 //$NON-NLS-1$
        "are",      // 6981 //$NON-NLS-1$
        "on",       // 6914 //$NON-NLS-1$
        "this",     // 6558 //$NON-NLS-1$
        "when",     // 5667 //$NON-NLS-1$
        "thou",     // 5470 //$NON-NLS-1$
        "thy",      // 5469 //$NON-NLS-1$
        "by",       // 5434 //$NON-NLS-1$
        "were",     // 5192 //$NON-NLS-1$
        "had",      // 5109 //$NON-NLS-1$
        "then",     // 5105 //$NON-NLS-1$
        "out",      // 4778 //$NON-NLS-1$
//      "man",      // 4702
//      "son",      // 4701
        "so",       // 4689 //$NON-NLS-1$
//      "king",     // 4568
//      "israel",   // 4407
        "there",    // 4393 //$NON-NLS-1$
//      "people",   // 4355
        "which",    // 4253 //$NON-NLS-1$
        "do",       // 4032 //$NON-NLS-1$
        "one",      // 3998 //$NON-NLS-1$
        "ye",       // 3970 //$NON-NLS-1$
        "up",       // 3798 //$NON-NLS-1$
        "thee",     // 3780 //$NON-NLS-1$
        "at",       // 3767 //$NON-NLS-1$
        "we",       // 3725 //$NON-NLS-1$
        "her",      // 3583 //$NON-NLS-1$
        "what",     // 3545 //$NON-NLS-1$
        "men",      // 3482 //$NON-NLS-1$
        "come",     // 3404 //$NON-NLS-1$
        "if",       // 3380 //$NON-NLS-1$
        "into",     // 3284 //$NON-NLS-1$
        "came",     // 3283 //$NON-NLS-1$
//      "land",     // 3182
//      "day",      // 3168
        "upon",     // 3164 //$NON-NLS-1$
        "before",   // 3133 //$NON-NLS-1$
        "or",       // 3097 //$NON-NLS-1$
//      "house",    // 2997
        "us",       // 2886 //$NON-NLS-1$
        "because",  // 2879 //$NON-NLS-1$
        "go",       // 2869 //$NON-NLS-1$
//      "against",  // 2851
        "an",       // 2828 //$NON-NLS-1$
//      "no",       // 2711
        "went",     // 2597 //$NON-NLS-1$
        "also",     // 2586 //$NON-NLS-1$
        "now",      // 2571 //$NON-NLS-1$
        "let",      // 2548 //$NON-NLS-1$
//      "made",     // 2478
        "hath",     // 2450 //$NON-NLS-1$
        "may",      // 2418 //$NON-NLS-1$
        "has",      // 2406 //$NON-NLS-1$
        "our",      // 2361 //$NON-NLS-1$
        "these",    // 2356 //$NON-NLS-1$
//      "down",     // 2314
//      "hand",     // 2314
//      "jesus",    // 2255
//      "children", // 2231
//      "like",     // 2180
//      "over",     // 2091
        "o",        // 2090 //$NON-NLS-1$
//      "david",    // 2089
//      "father",   // 2065
        "am", //$NON-NLS-1$
    };
}

--- NEW FILE: Msg.properties ---
# The naming convention for the keys in the file is ClassName.MessageName
# Where ClassName is the name of the class using the property.
# When the resource is used by more than one class it should be the one
# that the resource is most closely associated.
# The MessageName should be mixed case, with a leading capital.
# It should have no spaces or other punctuation (e.g. _, -, ', ...)

Example.Example=Example

--- NEW FILE: ThesaurusFactory.java ---
package org.crosswire.jsword.book.search;

import org.crosswire.common.util.ClassUtil;
import org.crosswire.common.util.Logger;

/**
 * Factory method for creating a new Thesaurus.
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author Joe Walker [joe at eireneh dot com]
 * @version $Id: ThesaurusFactory.java,v 1.1 2005/05/09 01:29:07 dmsmith Exp $
 */
public class ThesaurusFactory
{
    /**
     * Prevent Instansiation
     */
    private ThesaurusFactory()
    {
    }

    /**
     * Create a new Thesaurus.
     */
    public static Thesaurus createThesaurus() throws InstantiationException
    {
        try
        {
            Class impl = ClassUtil.getImplementor(Thesaurus.class);
            Thesaurus thesaurus = (Thesaurus) impl.newInstance();

            return thesaurus;
        }
        catch (Exception ex)
        {
            log.error("createThesaurus failed", ex); //$NON-NLS-1$
            throw new InstantiationException();
        }
    }

    /**
     * The log stream
     */
    private static final Logger log = Logger.getLogger(ThesaurusFactory.class);
}



More information about the jsword-svn mailing list