[jsword-svn] jsword/java/jsword/org/crosswire/jsword/book s
jswordcvs at crosswire.org
jswordcvs at crosswire.org
Sun May 8 18:29:11 MST 2005
Update of /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book
In directory www.crosswire.org:/tmp/cvs-serv6194/java/jsword/org/crosswire/jsword/book
Modified Files:
DefaultBook.java BooksEvent.java
Added Files:
SentenceUtil.java
Removed Files:
SentanceUtil.java
Log Message:
Moved unused code to limbo.
Upgraded support-tools: checkstyle, pmd and findbugs to most recent.
Addressed over 100 issues reported by findbugs and checkstyle.
Resulted in major refactoring of GBFFilter.
Net result is that code size is significantly smaller.
--- SentanceUtil.java DELETED ---
Index: DefaultBook.java
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/DefaultBook.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** DefaultBook.java 21 Mar 2005 02:37:28 -0000 1.1
--- DefaultBook.java 9 May 2005 01:29:07 -0000 1.2
***************
*** 8,12 ****
/**
* Defines a single default book.
! * *
* <p><table border='1' cellPadding='3' cellSpacing='0'>
* <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
--- 8,12 ----
/**
* Defines a single default book.
! *
* <p><table border='1' cellPadding='3' cellSpacing='0'>
* <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
--- NEW FILE: SentenceUtil.java ---
package org.crosswire.jsword.book;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.crosswire.common.util.StringUtil;
/**
* The SentenceUtil class provide utility functions for the various Books.
*
* It is not designed to be used outside of the book package, so using it
* outside of these bounds is at your own risk.
*
* <p><table border='1' cellPadding='3' cellSpacing='0'>
* <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
*
* Distribution Licence:<br />
* JSword is free software; you can redistribute it
* and/or modify it under the terms of the GNU General Public License,
* version 2 as published by the Free Software Foundation.<br />
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.<br />
* The License is available on the internet
* <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
* Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
* MA 02111-1307, USA<br />
* The copyright to this program is held by it's authors.
* </font></td></tr></table>
* @see gnu.gpl.Licence
* @author Joe Walker [joe at eireneh dot com]
* @version $Id: SentenceUtil.java,v 1.1 2005/05/09 01:29:07 dmsmith Exp $
*/
public class SentenceUtil
{
/**
* Ensure we can not be instansiated
*/
private SentenceUtil()
{
}
/**
* Take a string and tokenize it using " " and "--" as delimiters
* into an Array of Strings. There is a question mark over what to do
* with initial spaces. This algorithm disgards them, I'm not sure if
* this is the right thing to do.
* @param sentence The string to parse.
* @return The string array
*/
public static String[] tokenize(String sentence)
{
List tokens = new ArrayList();
int pos = 0;
String temp;
boolean alive = true;
while (alive)
{
// Find the next space and double dash
int nextSpace = sentence.indexOf(" ", pos); //$NON-NLS-1$
int nextDDash = sentence.indexOf("--", pos); //$NON-NLS-1$
// If there is a space just after the ddash then ignore the ddash
if (nextSpace == nextDDash + 2)
{
nextDDash = -1;
}
// If there is a ddash just after the space then ignore the space
if (nextDDash == nextSpace + 1)
{
nextSpace = -1;
}
// if there are no more tokens then just add in what we've got.
if (nextSpace == -1 && nextDDash == -1)
{
temp = sentence.substring(pos);
alive = false;
}
// Space is next if it is not -1 and it is less than ddash
else if ((nextSpace != -1 && nextSpace < nextDDash) || (nextDDash == -1))
{
// The next separator is a space
temp = sentence.substring(pos, nextSpace) + ' ';
pos = nextSpace + 1;
}
else
{
// The next separator is a ddash
temp = sentence.substring(pos, nextDDash) + "--"; //$NON-NLS-1$
pos = nextDDash + 2;
}
if (temp != null && !temp.trim().equals("")) //$NON-NLS-1$
{
tokens.add(temp);
}
}
// Create a String[]
String[] retcode = new String[tokens.size()];
int i = 0;
for (Iterator it = tokens.iterator(); it.hasNext(); )
{
retcode[i++] = (String) it.next();
}
return retcode;
}
/**
* From a sentence get a list of words (in original order) without
* any punctuation, and all in lower case.
* @param words Words with punctuation
* @return Words without punctuation
*/
public static String[] stripPunctuation(String[] words)
{
String[] retcode = new String[words.length];
// Remove the punctuation from the ends of the words.
for (int i = 0; i < words.length; i++)
{
retcode[i] = stripPunctuationWord(words[i]);
}
return retcode;
}
/**
* From a sentence get a list of words (in original order) without
* any punctuation, and all in lower case.
* @param words Words with punctuation
* @return Punctuation without words
*/
public static String[] stripWords(String[] words)
{
if (words.length == 0)
{
return new String[0];
}
String[] retcode = new String[words.length + 1];
// The first bit of punctuation is what comes in front of the first word
int first = firstLetter(words[0]);
if (first == 0)
{
retcode[0] = ""; //$NON-NLS-1$
}
else
{
retcode[0] = words[0].substring(0, first);
}
// The rest of the words
for (int i = 1; i < words.length; i++)
{
retcode[i] = stripWords(words[i - 1], words[i]);
}
// The last bit of punctuation is what comes at the end of the last word
int last = lastLetter(words[words.length - 1]);
if (last == words[words.length - 1].length())
{
retcode[words.length] = ""; //$NON-NLS-1$
}
else
{
retcode[words.length] = words[words.length - 1].substring(last + 1);
}
return retcode;
}
/**
* From a sentence get a list of words (in original order) without
* any punctuation, and all in lower case.
* @param sentence The string to parse.
* @return The words split up as an array
*/
public static String[] getWords(String sentence)
{
// First there are some things we regard as word delimitters even if
// they are not near space. Note that "-" should not be in this list
// because words like abel-beth-maiacha comtain them.
sentence = sentence.replaceAll("--", " "); //$NON-NLS-1$ //$NON-NLS-2$
sentence = sentence.replace('.', ' ');
sentence = sentence.replace('!', ' ');
sentence = sentence.replace('?', ' ');
sentence = sentence.replace(':', ' ');
sentence = sentence.replace(';', ' ');
sentence = sentence.replace('"', ' ');
sentence = sentence.replace('\'', ' ');
sentence = sentence.replace('(', ' ');
sentence = sentence.replace(')', ' ');
String[] words = StringUtil.split(sentence, " "); //$NON-NLS-1$
String[] retcode = new String[words.length];
// Remove the punctuation from the ends of the words.
for (int i = 0; i < words.length; i++)
{
retcode[i] = stripPunctuationWord(words[i]).toLowerCase();
}
return retcode;
}
/**
* Remove the punctuation from the ends of the word
* @param word Word with punctuation
* @return Word without punctuation
*/
public static String stripPunctuationWord(String word)
{
int first = firstLetter(word);
int last = lastLetter(word) + 1;
if (first > last)
{
return word;
}
return word.substring(first, last);
}
/**
* Remove the punctuation from the ends of the word. The special
* case is that if the first word ends "--" and the last word has
* no punctuation at the beginning, then the answer is "--" and not
* "-- ". We miss out the space because "--" is a special separator.
* @param first The word to grab the punctuation from the end of
* @param last The word to grab the punctuation from the start of
* @return The end of the first, a space, and the end of the first
*/
public static String stripWords(String first, String last)
{
String init1 = first.substring(lastLetter(first) + 1);
String init2 = last.substring(0, firstLetter(last));
return init1 + init2;
}
/**
* Where is the first letter in this word
* @param word The word to search for letters
* @return The offset of the first letter
*/
public static int firstLetter(String word)
{
int first;
for (first = 0; first < word.length(); first++)
{
char c = word.charAt(first);
if (Character.isLetterOrDigit(c))
{
break;
}
}
return first;
}
/**
* Where is the last letter in this word
* @param word The word to search for letters
* @return The offset of the last letter
*/
public static int lastLetter(String word)
{
int last;
for (last = word.length() - 1; last >= 0; last--)
{
char c = word.charAt(last);
if (Character.isLetterOrDigit(c))
{
break;
}
}
return last;
}
}
Index: BooksEvent.java
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/BooksEvent.java,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** BooksEvent.java 19 Mar 2005 01:56:47 -0000 1.7
--- BooksEvent.java 9 May 2005 01:29:07 -0000 1.8
***************
*** 68,72 ****
* The name of the changed Bible
*/
! private Book book;
/**
--- 68,72 ----
* The name of the changed Bible
*/
! private transient Book book;
/**
More information about the jsword-svn
mailing list