[jsword-svn] jsword/java/jsword/org/crosswire/jsword/book s

jswordcvs at crosswire.org jswordcvs at crosswire.org
Sun May 8 18:29:11 MST 2005


Update of /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book
In directory www.crosswire.org:/tmp/cvs-serv6194/java/jsword/org/crosswire/jsword/book

Modified Files:
	DefaultBook.java BooksEvent.java 
Added Files:
	SentenceUtil.java 
Removed Files:
	SentanceUtil.java 
Log Message:
Moved unused code to limbo.
Upgraded support-tools: checkstyle, pmd and findbugs to most recent.
Addressed over 100 issues reported by findbugs and checkstyle.
Resulted in major refactoring of GBFFilter.
Net result is that code size is significantly smaller.

--- SentanceUtil.java DELETED ---

Index: DefaultBook.java
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/DefaultBook.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** DefaultBook.java	21 Mar 2005 02:37:28 -0000	1.1
--- DefaultBook.java	9 May 2005 01:29:07 -0000	1.2
***************
*** 8,12 ****
  /**
   * Defines a single default book.
!  * *
   * <p><table border='1' cellPadding='3' cellSpacing='0'>
   * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
--- 8,12 ----
  /**
   * Defines a single default book.
!  *
   * <p><table border='1' cellPadding='3' cellSpacing='0'>
   * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>

--- NEW FILE: SentenceUtil.java ---
package org.crosswire.jsword.book;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.crosswire.common.util.StringUtil;

/**
 * The SentenceUtil class provide utility functions for the various Books.
 * 
 * It is not designed to be used outside of the book package, so using it
 * outside of these bounds is at your own risk.
 * 
 * <p><table border='1' cellPadding='3' cellSpacing='0'>
 * <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
 *
 * Distribution Licence:<br />
 * JSword is free software; you can redistribute it
 * and/or modify it under the terms of the GNU General Public License,
 * version 2 as published by the Free Software Foundation.<br />
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.<br />
 * The License is available on the internet
 * <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 * MA 02111-1307, USA<br />
 * The copyright to this program is held by it's authors.
 * </font></td></tr></table>
 * @see gnu.gpl.Licence
 * @author Joe Walker [joe at eireneh dot com]
 * @version $Id: SentenceUtil.java,v 1.1 2005/05/09 01:29:07 dmsmith Exp $
 */
public class SentenceUtil
{
    /**
     * Ensure we can not be instansiated
     */
    private SentenceUtil()
    {
    }

    /**
     * Take a string and tokenize it using " " and "--" as delimiters
     * into an Array of Strings. There is a question mark over what to do
     * with initial spaces. This algorithm disgards them, I'm not sure if
     * this is the right thing to do.
     * @param sentence The string to parse.
     * @return The string array
     */
    public static String[] tokenize(String sentence)
    {
        List tokens = new ArrayList();

        int pos = 0;
        String temp;
        boolean alive = true;

        while (alive)
        {
            // Find the next space and double dash
            int nextSpace = sentence.indexOf(" ", pos); //$NON-NLS-1$
            int nextDDash = sentence.indexOf("--", pos); //$NON-NLS-1$

            // If there is a space just after the ddash then ignore the ddash
            if (nextSpace == nextDDash + 2)
            {
                nextDDash = -1;
            }

            // If there is a ddash just after the space then ignore the space
            if (nextDDash == nextSpace + 1)
            {
                nextSpace = -1;
            }

            // if there are no more tokens then just add in what we've got.
            if (nextSpace == -1 && nextDDash == -1)
            {
                temp = sentence.substring(pos);
                alive = false;
            }
            // Space is next if it is not -1 and it is less than ddash
            else if ((nextSpace != -1 && nextSpace < nextDDash) || (nextDDash == -1))
            {
                // The next separator is a space
                temp = sentence.substring(pos, nextSpace) + ' ';
                pos = nextSpace + 1;
            }
            else
            {
                // The next separator is a ddash
                temp = sentence.substring(pos, nextDDash) + "--"; //$NON-NLS-1$
                pos = nextDDash + 2;
            }

            if (temp != null && !temp.trim().equals("")) //$NON-NLS-1$
            {
                tokens.add(temp);
            }
        }

        // Create a String[]
        String[] retcode = new String[tokens.size()];
        int i = 0;
        for (Iterator it = tokens.iterator(); it.hasNext(); )
        {
            retcode[i++] = (String) it.next();
        }

        return retcode;
    }

    /**
     * From a sentence get a list of words (in original order) without
     * any punctuation, and all in lower case.
     * @param words Words with punctuation
     * @return Words without punctuation
     */
    public static String[] stripPunctuation(String[] words)
    {
        String[] retcode = new String[words.length];

        // Remove the punctuation from the ends of the words.
        for (int i = 0; i < words.length; i++)
        {
            retcode[i] = stripPunctuationWord(words[i]);
        }

        return retcode;
    }

    /**
     * From a sentence get a list of words (in original order) without
     * any punctuation, and all in lower case.
     * @param words Words with punctuation
     * @return Punctuation without words
     */
    public static String[] stripWords(String[] words)
    {
        if (words.length == 0)
        {
            return new String[0];
        }

        String[] retcode = new String[words.length + 1];

        // The first bit of punctuation is what comes in front of the first word
        int first = firstLetter(words[0]);
        if (first == 0)
        {
            retcode[0] = ""; //$NON-NLS-1$
        }
        else
        {
            retcode[0] = words[0].substring(0, first);
        }

        // The rest of the words
        for (int i = 1; i < words.length; i++)
        {
            retcode[i] = stripWords(words[i - 1], words[i]);
        }

        // The last bit of punctuation is what comes at the end of the last word
        int last = lastLetter(words[words.length - 1]);
        if (last == words[words.length - 1].length())
        {
            retcode[words.length] = ""; //$NON-NLS-1$
        }
        else
        {
            retcode[words.length] = words[words.length - 1].substring(last + 1);
        }

        return retcode;
    }

    /**
     * From a sentence get a list of words (in original order) without
     * any punctuation, and all in lower case.
     * @param sentence The string to parse.
     * @return The words split up as an array
     */
    public static String[] getWords(String sentence)
    {
        // First there are some things we regard as word delimitters even if
        // they are not near space. Note that "-" should not be in this list
        // because words like abel-beth-maiacha comtain them.
        sentence = sentence.replaceAll("--", " "); //$NON-NLS-1$ //$NON-NLS-2$
        sentence = sentence.replace('.', ' ');
        sentence = sentence.replace('!', ' ');
        sentence = sentence.replace('?', ' ');
        sentence = sentence.replace(':', ' ');
        sentence = sentence.replace(';', ' ');
        sentence = sentence.replace('"', ' ');
        sentence = sentence.replace('\'', ' ');
        sentence = sentence.replace('(', ' ');
        sentence = sentence.replace(')', ' ');

        String[] words = StringUtil.split(sentence, " "); //$NON-NLS-1$
        String[] retcode = new String[words.length];

        // Remove the punctuation from the ends of the words.
        for (int i = 0; i < words.length; i++)
        {
            retcode[i] = stripPunctuationWord(words[i]).toLowerCase();
        }

        return retcode;
    }

    /**
     * Remove the punctuation from the ends of the word
     * @param word Word with punctuation
     * @return Word without punctuation
     */
    public static String stripPunctuationWord(String word)
    {
        int first = firstLetter(word);
        int last = lastLetter(word) + 1;

        if (first > last)
        {
            return word;
        }

        return word.substring(first, last);
    }

    /**
     * Remove the punctuation from the ends of the word. The special
     * case is that if the first word ends "--" and the last word has
     * no punctuation at the beginning, then the answer is "--" and not
     * "-- ". We miss out the space because "--" is a special separator.
     * @param first The word to grab the punctuation from the end of
     * @param last The word to grab the punctuation from the start of
     * @return The end of the first, a space, and the end of the first
     */
    public static String stripWords(String first, String last)
    {
        String init1 = first.substring(lastLetter(first) + 1);
        String init2 = last.substring(0, firstLetter(last));

        return init1 + init2;
    }

    /**
     * Where is the first letter in this word
     * @param word The word to search for letters
     * @return The offset of the first letter
     */
    public static int firstLetter(String word)
    {
        int first;

        for (first = 0; first < word.length(); first++)
        {
            char c = word.charAt(first);
            if (Character.isLetterOrDigit(c))
            {
                break;
            }
        }

        return first;
    }

    /**
     * Where is the last letter in this word
     * @param word The word to search for letters
     * @return The offset of the last letter
     */
    public static int lastLetter(String word)
    {
        int last;

        for (last = word.length() - 1; last >= 0; last--)
        {
            char c = word.charAt(last);
            if (Character.isLetterOrDigit(c))
            {
                break;
            }
        }

        return last;
    }
}

Index: BooksEvent.java
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/BooksEvent.java,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** BooksEvent.java	19 Mar 2005 01:56:47 -0000	1.7
--- BooksEvent.java	9 May 2005 01:29:07 -0000	1.8
***************
*** 68,72 ****
       * The name of the changed Bible
       */
!     private Book book;
  
      /**
--- 68,72 ----
       * The name of the changed Bible
       */
!     private transient Book book;
  
      /**



More information about the jsword-svn mailing list