[jsword-svn] r1701 - in trunk: bibledesktop/etc/installer/macosx/BibleDesktop.app/Contents bibledesktop/etc/jnlp bibledesktop/src/main/java/org/crosswire/bibledesktop/book bibledesktop/src/main/java/org/crosswire/bibledesktop/book/install bibledesktop/src/main/java/org/crosswire/bibledesktop/desktop common common/src/main/java/org/crosswire/common/config common/src/main/java/org/crosswire/common/diff common/src/main/java/org/crosswire/common/icu common/src/main/java/org/crosswire/common/util jsword jsword/jar/lucene-2.2.0 jsword/src/main/java/org/crosswire/jsword/book jsword/src/main/java/org/crosswire/jsword/book/basic jsword/src/main/java/org/crosswire/jsword/book/sword jsword/src/main/java/org/crosswire/jsword/index/lucene jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis jsword/src/main/java/org/crosswire/jsword/passage jsword/src/test/java jsword/src/test/java/org/crosswire/jsword jsword/src/test/java/org/crosswire/jsword/index jsword/src/test/java/org/crosswire/jsword/index/lucene jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis
dmsmith at www.crosswire.org
dmsmith at www.crosswire.org
Wed Oct 24 13:15:09 MST 2007
Author: dmsmith
Date: 2007-10-24 13:15:07 -0700 (Wed, 24 Oct 2007)
New Revision: 1701
Added:
trunk/jsword/jar/lucene-2.2.0/lucene-analyzers-2.2.0.jar
trunk/jsword/jar/lucene-2.2.0/lucene-snowball-2.2.0.jar
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.properties
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.properties
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/CzechLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GermanLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/SimpleLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/package.html
trunk/jsword/src/test/java/org/crosswire/jsword/index/
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactoryTest.java
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzerTest.java
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzerTest.java
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzerTest.java
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzerTest.java
trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzerTest.java
Modified:
trunk/bibledesktop/etc/installer/macosx/BibleDesktop.app/Contents/Info.plist
trunk/bibledesktop/etc/jnlp/bibledesktop.jnlp
trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/book/AdvancedSearchPane.java
trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/book/install/InternetWarning.java
trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/book/install/SitePane.java
trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/desktop/Translations.java
trunk/common/core.xml
trunk/common/src/main/java/org/crosswire/common/config/AbstractReflectedChoice.java
trunk/common/src/main/java/org/crosswire/common/diff/PatchEntry.java
trunk/common/src/main/java/org/crosswire/common/icu/NumberShaper.java
trunk/common/src/main/java/org/crosswire/common/util/ReflectionUtil.java
trunk/common/src/main/java/org/crosswire/common/util/ResourceUtil.java
trunk/jsword/.classpath
trunk/jsword/src/main/java/org/crosswire/jsword/book/BookmarkFactory.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/Books.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/DefaultBookMetaData.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/DefaultBookmark.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/BookType.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/ConfigEntryTable.java
trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/RawBackend.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java
trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java
trunk/jsword/src/main/java/org/crosswire/jsword/passage/Verse.java
trunk/jsword/src/main/java/org/crosswire/jsword/passage/VerseRange.java
trunk/jsword/src/test/java/JSwordAllTests.java
Log:
Code from Sijo Cherian for improved Lucene searching.
Modified: trunk/bibledesktop/etc/installer/macosx/BibleDesktop.app/Contents/Info.plist
===================================================================
--- trunk/bibledesktop/etc/installer/macosx/BibleDesktop.app/Contents/Info.plist 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/bibledesktop/etc/installer/macosx/BibleDesktop.app/Contents/Info.plist 2007-10-24 20:15:07 UTC (rev 1701)
@@ -42,6 +42,8 @@
<string>$JAVAROOT/jsword-common-swing- at release.version@.jar</string>
<string>$JAVAROOT/jsword-common-aqua-1.0.6.jar</string>
<string>$JAVAROOT/lucene-core-2.2.0.jar</string>
+ <string>$JAVAROOT/lucene-snowball-2.2.0.jar</string>
+ <string>$JAVAROOT/lucene-analyzers-2.2.0.jar</string>
<string>$JAVAROOT/icu4j_3_6_1.jar</string>
<string>$JAVAROOT/jdom-1.0.jar</string>
<string>$JAVAROOT/commons-codec-1.3.jar</string>
Modified: trunk/bibledesktop/etc/jnlp/bibledesktop.jnlp
===================================================================
--- trunk/bibledesktop/etc/jnlp/bibledesktop.jnlp 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/bibledesktop/etc/jnlp/bibledesktop.jnlp 2007-10-24 20:15:07 UTC (rev 1701)
@@ -40,6 +40,8 @@
<jar href="commons-logging-1.1.jar"/>
<jar href="commons-net-1.4.1.jar"/>
<jar href="lucene-core-2.2.0.jar"/>
+ <jar href="lucene-snowball-2.2.0.jar"/>
+ <jar href="lucene-analyzers-2.2.0.jar"/>
<jar href="icu4j_3_6_1.jar"/>
<jar href="jlfgr-1_0.jar"/>
<property name="apple.laf.useScreenMenuBar" value="true"/>
Modified: trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/book/AdvancedSearchPane.java
===================================================================
--- trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/book/AdvancedSearchPane.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/book/AdvancedSearchPane.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -298,7 +298,7 @@
Frame root = JOptionPane.getFrameForComponent(parent);
dlgMain = new JDialog(root);
dlgMain.setComponentOrientation(root.getComponentOrientation());
-
+
KeyStroke esc = KeyStroke.getKeyStroke(KeyEvent.VK_ESCAPE, 0);
bailout = true;
Modified: trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/book/install/InternetWarning.java
===================================================================
--- trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/book/install/InternetWarning.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/book/install/InternetWarning.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -121,13 +121,13 @@
public void doYes()
{
dialog.setVisible(false);
- choice = InternetWarning.GRANTED;
+ choice = InternetWarning.GRANTED;
}
public void doNo()
{
dialog.setVisible(false);
- choice = InternetWarning.DENIED;
+ choice = InternetWarning.DENIED;
}
/**
Modified: trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/book/install/SitePane.java
===================================================================
--- trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/book/install/SitePane.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/book/install/SitePane.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -477,7 +477,7 @@
size /= 1024.0F;
msg = Msg.MB_SIZE;
}
-
+
if (JOptionPane.showConfirmDialog(this, msg.toString(new Object[] {name.getName(), new Float(size)}),
Msg.CONFIRMATION_TITLE.toString(),
JOptionPane.YES_NO_OPTION) == JOptionPane.YES_OPTION)
Modified: trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/desktop/Translations.java
===================================================================
--- trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/desktop/Translations.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/bibledesktop/src/main/java/org/crosswire/bibledesktop/desktop/Translations.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -59,7 +59,7 @@
translation = DEFAULT_TRANSLATION;
}
}
-
+
/**
* All access to Translations is through this single instance.
*
Modified: trunk/common/core.xml
===================================================================
--- trunk/common/core.xml 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/common/core.xml 2007-10-24 20:15:07 UTC (rev 1701)
@@ -57,6 +57,8 @@
<include name="**/commons-logging-*.jar"/>
<include name="**/commons-net-*.jar"/>
<include name="**/lucene-core-*.jar"/>
+ <include name="**/lucene-analyzers-*.jar"/>
+ <include name="**/lucene-snowball-*.jar"/>
<include name="**/jlfgr-*.jar"/>
<include name="**/jsword-common-aqua-*.jar"/>
<include name="**/javatar-*.jar"/>
Modified: trunk/common/src/main/java/org/crosswire/common/config/AbstractReflectedChoice.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/config/AbstractReflectedChoice.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/common/src/main/java/org/crosswire/common/config/AbstractReflectedChoice.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -75,7 +75,7 @@
{
ignored = false;
}
-
+
String helpText = configResources.getString(key + ".help"); //$NON-NLS-1$
assert helpText != null;
setHelpText(helpText);
Modified: trunk/common/src/main/java/org/crosswire/common/diff/PatchEntry.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/diff/PatchEntry.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/common/src/main/java/org/crosswire/common/diff/PatchEntry.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -438,7 +438,7 @@
break;
default:
buf.append(c);
- }
+ }
}
return buf.toString();
}
@@ -452,7 +452,8 @@
{
int strlen = str.length();
StringBuffer buf = new StringBuffer(2 * strlen);
- for (int i = 0; i < strlen; i++)
+ int i = 0;
+ for (i = 0; i < strlen; i++)
{
char c = str.charAt(i);
if (c == '%')
Modified: trunk/common/src/main/java/org/crosswire/common/icu/NumberShaper.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/icu/NumberShaper.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/common/src/main/java/org/crosswire/common/icu/NumberShaper.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -131,7 +131,7 @@
{
return new String(dest);
}
-
+
return input;
}
@@ -285,12 +285,13 @@
// }
// }
- for (int i = 0, e = src.length; i < e; i++)
+ int len = src.length;
+ for (int i = 0; i < len; i++)
{
char c = text[i];
if (c >= zero && c <= nine)
{
- text[i] = (char)(c + offset);
+ text[i] = (char) (c + offset);
transformed[0] = true;
}
}
Modified: trunk/common/src/main/java/org/crosswire/common/util/ReflectionUtil.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/util/ReflectionUtil.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/common/src/main/java/org/crosswire/common/util/ReflectionUtil.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -72,10 +72,10 @@
* @throws InstantiationException
*/
public static Object construct(String className, Object[] params) throws ClassNotFoundException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException
- {
+ {
Class[] paramTypes = describeParameters(params);
Class clazz = Class.forName(className);
- final Constructor c = clazz.getConstructor(paramTypes);
+ final Constructor c = clazz.getConstructor(paramTypes);
return c.newInstance(params);
}
@@ -93,14 +93,14 @@
* @throws InstantiationException
*/
public static Object construct(String className, Object[] params, Class[] paramTypes) throws ClassNotFoundException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException
- {
+ {
Class[] calledTypes = paramTypes;
if (calledTypes == null)
{
calledTypes = describeParameters(params);
}
Class clazz = Class.forName(className);
- final Constructor c = clazz.getConstructor(calledTypes);
+ final Constructor c = clazz.getConstructor(calledTypes);
return c.newInstance(params);
}
Modified: trunk/common/src/main/java/org/crosswire/common/util/ResourceUtil.java
===================================================================
--- trunk/common/src/main/java/org/crosswire/common/util/ResourceUtil.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/common/src/main/java/org/crosswire/common/util/ResourceUtil.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -138,7 +138,7 @@
{
String lookup = subject + FileUtil.EXTENSION_PROPERTIES;
InputStream in = getResourceAsStream(clazz, lookup);
-
+
Properties prop = new Properties();
prop.load(in);
return prop;
Modified: trunk/jsword/.classpath
===================================================================
--- trunk/jsword/.classpath 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/.classpath 2007-10-24 20:15:07 UTC (rev 1701)
@@ -5,5 +5,7 @@
<classpathentry kind="src" path="src/main/java"/>
<classpathentry kind="src" path="src/test/java"/>
<classpathentry kind="lib" path="jar/lucene-2.2.0/lucene-core-2.2.0.jar" sourcepath="jar/lucene-2.2.0/lucene-2.2.0-src.zip"/>
+ <classpathentry kind="lib" path="jar/lucene-2.2.0/lucene-snowball-2.2.0.jar" />
+ <classpathentry kind="lib" path="jar/lucene-2.2.0/lucene-analyzers-2.2.0.jar" />
<classpathentry kind="output" path="bin"/>
</classpath>
Added: trunk/jsword/jar/lucene-2.2.0/lucene-analyzers-2.2.0.jar
===================================================================
(Binary files differ)
Property changes on: trunk/jsword/jar/lucene-2.2.0/lucene-analyzers-2.2.0.jar
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: trunk/jsword/jar/lucene-2.2.0/lucene-snowball-2.2.0.jar
===================================================================
(Binary files differ)
Property changes on: trunk/jsword/jar/lucene-2.2.0/lucene-snowball-2.2.0.jar
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/BookmarkFactory.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/BookmarkFactory.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/BookmarkFactory.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -25,7 +25,6 @@
import org.crosswire.common.util.ClassUtil;
import org.crosswire.common.util.Logger;
-import org.crosswire.jsword.index.IndexManager;
/**
* A Factory class for Bookmarks.
@@ -49,8 +48,7 @@
*/
public static Bookmark getBookmark()
{
- return (Bookmark)
- instance.clone();
+ return (Bookmark) instance.clone();
}
/**
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/Books.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/Books.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/Books.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -156,8 +156,11 @@
{
Book book = (Book) iter.next();
Object property = book.getProperty(propertyKey);
- String value = property instanceof String ? (String) property : property.toString();
- max = Math.max(max, value == null ? -1 : value.length());
+ if (property != null)
+ {
+ String value = property instanceof String ? (String) property : property.toString();
+ max = Math.max(max, value.length());
+ }
}
return max;
}
@@ -177,8 +180,11 @@
{
Book book = (Book) iter.next();
Object property = book.getProperty(propertyKey);
- String value = property instanceof String ? (String) property : property.toString();
- max = Math.max(max, value == null ? -1 : value.length());
+ if (property != null)
+ {
+ String value = property instanceof String ? (String) property : property.toString();
+ max = Math.max(max, value.length());
+ }
}
return max;
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/Msg.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -33,7 +33,7 @@
*/
final class Msg extends MsgBase
{
-
+
static final Msg BIBLE_NOTFOUND = new Msg("Defaults.BibleNotFound"); //$NON-NLS-1$
static final Msg DICTIONARY_NOTFOUND = new Msg("Defaults.DictionaryNotFound"); //$NON-NLS-1$
static final Msg COMMENTARY_NOTFOUND = new Msg("Defaults.CommentaryNotFound"); //$NON-NLS-1$
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/OSISUtil.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -979,7 +979,7 @@
}
return div.cloneContent();
}
-
+
public static List rtfToOsis(String rtf)
{
Element div = factory().createDiv();
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/DefaultBookMetaData.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/DefaultBookMetaData.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/DefaultBookMetaData.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -41,7 +41,7 @@
/**
* DefaultBookMetaData is an implementation of the of the BookMetaData
- * interface. A less complete implementation design for imheritance is
+ * interface. A less complete implementation design for inheritance is
* available in AbstractBookMetaData where the complexity is in the setup rather
* than the inheritance. DefaultBookMetaData is probably the preferred
* implementation.
@@ -125,7 +125,7 @@
{
return false;
}
-
+
return ComponentOrientation.getOrientation(new Locale(lang)).isLeftToRight();
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/DefaultBookmark.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/DefaultBookmark.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/basic/DefaultBookmark.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -123,7 +123,16 @@
*/
public Object clone()
{
- return null;
+ Object clone = null;
+ try
+ {
+ clone = super.clone();
+ }
+ catch (CloneNotSupportedException e)
+ {
+ assert false : e;
+ }
+ return clone;
}
/**
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/BookType.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/BookType.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/BookType.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -70,7 +70,7 @@
protected AbstractBackend getBackend(SwordBookMetaData sbmd) throws BookException
{
- BlockType blockType = BlockType.fromString((String)sbmd.getProperty(ConfigEntryType.BLOCK_TYPE));
+ BlockType blockType = BlockType.fromString((String) sbmd.getProperty(ConfigEntryType.BLOCK_TYPE));
return new ZVerseBackend(sbmd, blockType);
}
@@ -130,7 +130,7 @@
protected AbstractBackend getBackend(SwordBookMetaData sbmd) throws BookException
{
- BlockType blockType = BlockType.fromString((String)sbmd.getProperty(ConfigEntryType.BLOCK_TYPE));
+ BlockType blockType = BlockType.fromString((String) sbmd.getProperty(ConfigEntryType.BLOCK_TYPE));
return new ZVerseBackend(sbmd, blockType);
}
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/ConfigEntryTable.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/ConfigEntryTable.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/ConfigEntryTable.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -607,7 +607,7 @@
testLanguage(internal, lang);
testLanguage(internal, langFrom);
testLanguage(internal, langTo);
-
+
// The LANG field should match the GLOSSARY_FROM field
if (langFrom != null && !langFrom.equals(lang))
{
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/RawBackend.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/RawBackend.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/book/sword/RawBackend.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -165,7 +165,7 @@
}
int entrysize = datasize + OFFSETSIZE;
-
+
// Read the next entrysize byes.
byte[] read = SwordUtil.readRAF(idxRaf[testament], index * entrysize, entrysize);
if (read == null || read.length == 0)
@@ -267,5 +267,5 @@
/**
* How many bytes in the size count in the index
*/
- private int datasize = -1;
+ private int datasize = -1;
}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,90 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: KeyAnalyzer.java 1376 2007-06-01 18:27:01Z dmsmith $
+ */
+package org.crosswire.jsword.index.lucene;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.crosswire.common.util.Logger;
+import org.crosswire.common.util.ResourceUtil;
+/**
+ * A singleton that Reads and Maintains IndexMetadata from properties file
+ * All version number in the properties file must be float.
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author Sijo Cherian [sijocherian at yahoo dot com]
+ */
+public class IndexMetadata
+{
+ private IndexMetadata()
+ {
+ try
+ {
+ props = ResourceUtil.getProperties(getClass());
+
+ }
+ catch (IOException e)
+ {
+ log.error("Property file read error", e); //$NON-NLS-1$
+ }
+ }
+
+ /**
+ * All access to IndexMetadata is through this single instance.
+ *
+ * @return the singleton instance
+ */
+ public static IndexMetadata instance()
+ {
+ if (myInstance == null)
+ {
+ myInstance = new IndexMetadata();
+ }
+ return myInstance;
+ }
+
+ public float getInstalledIndexVersion()
+ {
+ return Float.parseFloat(props.getProperty(INDEX_VERSION, "1.1")); //$NON-NLS-1$
+ }
+
+ public float getLuceneVersion()
+ {
+ return Float.parseFloat(props.getProperty(LUCENE_VERSION));
+ }
+
+ public float getLatestIndexVersion()
+ {
+ return Float.parseFloat(props.getProperty(LATEST_INDEX_VERSION, "1.1")); //$NON-NLS-1$
+ }
+
+ public static final String INDEX_VERSION = "Installed.Index.Version"; //$NON-NLS-1$
+ public static final String LATEST_INDEX_VERSION = "Latest.Index.Version"; //$NON-NLS-1$
+ public static final String LUCENE_VERSION = "Lucene.Version"; //$NON-NLS-1$
+ public static final float INDEX_VERSION_1_1 = 1.1f;
+ public static final float INDEX_VERSION_1_2 = 1.1f;
+
+ private static final Logger log = Logger.getLogger(IndexMetadata.class);
+ private static IndexMetadata myInstance;
+ private Properties props;
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.properties
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.properties (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.properties 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,18 @@
+# Format Description: Each key is a meta-data about the LuceneIndex
+# All version numbers should be one decimal precision floats
+
+# Index Version History/Details:
+# 1.0 : Original index format. Uses: fields = key,content; Analyzer = SimpleAnalyzer
+# 1.1 : Added field = strong, heading, xref, note
+# 1.2 : Added natural language analysis (Stemming, CJK tokenization, optionally Stopword) for field = content
+# Uses AnalyzerFactory.properties to configure Analyzers
+
+# Index Version that is installed
+Installed.Index.Version=1.1
+
+#Not used right now
+Latest.Index.Version=1.2
+
+#Not used right now
+Lucene.Version=2.2
+
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneAnalyzer.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -27,10 +27,11 @@
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.TokenStream;
+import org.crosswire.jsword.index.lucene.analysis.AnalyzerFactory;
/**
* A specialized analyzer for Books that analyzes different fields differently.
- *
+ * Uses AnalyzerFactory for InstalledIndexVersion > 1.1
* @see gnu.lgpl.License for license details.
* The copyright to this program is held by it's authors.
* @author DM Smith [dmsmith555 at yahoo dot com]
@@ -40,9 +41,22 @@
public LuceneAnalyzer()
{
+ this(AnalyzerFactory.DEFAULT_ID);
+ }
+
+ public LuceneAnalyzer(String naturalLanguageID)
+ {
// The default analysis
analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
+ if (IndexMetadata.instance().getInstalledIndexVersion() > IndexMetadata.INDEX_VERSION_1_1)
+ {
+ // Content is analyzed using natural language analyzer
+ // (stemming, stopword etc)
+ Analyzer myNaturalLanguageAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(naturalLanguageID);
+ analyzer.addAnalyzer(LuceneIndex.FIELD_BODY, myNaturalLanguageAnalyzer);
+ }
+
// Keywords are normalized to osisIDs
analyzer.addAnalyzer(LuceneIndex.FIELD_KEY, new KeyAnalyzer());
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -119,7 +119,10 @@
Progress job = JobManager.createJob(Msg.INDEX_START.toString(), Thread.currentThread(), false);
IndexStatus finalStatus = IndexStatus.UNDONE;
- Analyzer analyzer = new LuceneAnalyzer();
+
+ String bookLang = book.getLanguage().getName();
+ Analyzer analyzer = new LuceneAnalyzer(bookLang);
+
List errors = new ArrayList();
File tempPath = new File(path + '.' + IndexStatus.CREATING.toString());
@@ -130,8 +133,7 @@
book.setIndexStatus(IndexStatus.CREATING);
- // An index is created by opening an IndexWriter with the
- // create argument set to true.
+ // An index is created by opening an IndexWriter with the create argument set to true.
//IndexWriter writer = new IndexWriter(tempPath.getCanonicalPath(), analyzer, true);
// Create the index in core.
@@ -206,10 +208,12 @@
{
try
{
+ String bookLang = book.getLanguage().getName();
+ Analyzer analyzer = new LuceneAnalyzer(bookLang);
- Analyzer analyzer = new LuceneAnalyzer();
QueryParser parser = new QueryParser(LuceneIndex.FIELD_BODY, analyzer);
Query query = parser.parse(search);
+ log.info("ParsedQuery-" + query.toString()); //$NON-NLS-1$
Hits hits = searcher.search(query);
// For ranking we use a PassageTally
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AbstractAnalyzer.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,88 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.StopFilter;
+
+/**
+ * Base class for Analyzers.
+ * Note: All analyzers configured in AnalyzerFactory.properties should be of this type
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author sijo cherian [sijocherian at yahoo dot com]
+ */
+public abstract class AbstractAnalyzer extends Analyzer
+{
+
+ public AbstractAnalyzer()
+ {
+ doStopWords = false;
+ doStemming = true;
+ naturalLanguage = null;
+ }
+
+ public void setDoStopWords(boolean doIt)
+ {
+ doStopWords = doIt;
+ }
+
+ public boolean getDoStopWords()
+ {
+ return doStopWords;
+ }
+
+ public void setStopWords(String[] stopWords)
+ {
+ stopSet = StopFilter.makeStopSet(stopWords);
+ }
+
+ public void setDoStemming(boolean stemming)
+ {
+ doStemming = stemming;
+ }
+
+ public void setNaturalLanguage(String lang)
+ {
+ naturalLanguage = lang;
+ }
+
+ public String getNaturalLanguage()
+ {
+ return naturalLanguage;
+ }
+
+ protected Set stopSet;
+
+ // for turning on/off stopword removal during analysis
+ protected boolean doStopWords;
+
+ // for turning on/off stemming
+ protected boolean doStemming;
+
+ // Natural language of text that is being analyzed (optional parameter)
+ protected String naturalLanguage;
+
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,150 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.crosswire.common.util.Logger;
+import org.crosswire.common.util.ResourceUtil;
+
+/**
+ * A factory creating the appropriate Analyzer for natural language analysis of text for Lucene
+ * Indexing and Query Parsing.
+ * Note: [Lang] refers to CommonName for ISO639 Language
+ * Dependency: Analyzer from lucene contrib: lucene-analyzers-[version].jar, lucene-snowball-[version].jar
+ *
+ * Properties used:
+ * <Key> : <Value>
+ * Default.Analyzer : The default analyzer class
+ * [Lang].Analyzer : Appropriate Analyzer class to be used for the language of the book
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author Sijo Cherian [sijocherian at yahoo dot com]
+ */
+public class AnalyzerFactory
+{
+ public AbstractAnalyzer createAnalyzer(String lang)
+ {
+ AbstractAnalyzer newObject = null;
+ if (lang != null)
+ {
+ String adjustLang = lang;
+ // Deal with non-standard language names
+ if (adjustLang.startsWith("Greek, Modern")) //$NON-NLS-1$
+ {
+ adjustLang = "Greek"; //$NON-NLS-1$
+ }
+
+ String aClass = getAnalyzerValue(adjustLang);
+
+ log.debug("Creating analyzer:" + aClass + " BookLang:" + adjustLang); //$NON-NLS-1$ //$NON-NLS-2$
+
+ if (aClass != null)
+ {
+ try
+ {
+ Class impl = Class.forName(aClass);
+
+ newObject = (AbstractAnalyzer) impl.newInstance();
+ }
+ catch (ClassNotFoundException e)
+ {
+ log.error("Configuration error in AnalyzerFactory properties", e); //$NON-NLS-1$
+ }
+ catch (IllegalAccessException e)
+ {
+ log.error("Configuration error in AnalyzerFactory properties", e); //$NON-NLS-1$
+ }
+ catch (InstantiationException e)
+ {
+ log.error("Configuration error in AnalyzerFactory properties", e); //$NON-NLS-1$
+ }
+ }
+ }
+
+ if (newObject == null)
+ {
+ newObject = new SimpleLuceneAnalyzer();
+ }
+
+ // Configure the analyzer
+ newObject.setDoStemming(getDefaultStemmingProperty());
+ newObject.setDoStopWords(getDefaultStopWordProperty());
+ newObject.setNaturalLanguage(lang);
+ return newObject;
+ }
+
+ public static AnalyzerFactory getInstance()
+ {
+ if (myInstance == null)
+ {
+ myInstance = new AnalyzerFactory();
+ }
+
+ return myInstance;
+ }
+
+ private AnalyzerFactory()
+ {
+ loadProperties();
+ }
+
+ public String getAnalyzerValue(String lang)
+ {
+ String key = lang + ".Analyzer"; //$NON-NLS-1$
+ return myProperties.getProperty(key);
+ }
+
+ public boolean getDefaultStemmingProperty()
+ {
+ String key = DEFAULT_ID + ".Stemming"; //$NON-NLS-1$
+ return Boolean.valueOf(myProperties.getProperty(key)).booleanValue();
+ }
+
+ public boolean getDefaultStopWordProperty()
+ {
+ String key = DEFAULT_ID + ".StopWord"; //$NON-NLS-1$
+ return Boolean.valueOf(myProperties.getProperty(key)).booleanValue();
+ }
+
+ private void loadProperties()
+ {
+
+ try
+ {
+ myProperties = ResourceUtil.getProperties(getClass());
+ }
+ catch (IOException e)
+ {
+ log.error("AnalyzerFactory property load from file failed", e); //$NON-NLS-1$
+ }
+ }
+
+ public static final String DEFAULT_ID = "Default"; //$NON-NLS-1$
+ private static final Logger log = Logger.getLogger(AnalyzerFactory.class);
+ private static AnalyzerFactory myInstance;
+
+ private Properties myProperties;
+
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.properties
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.properties (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactory.properties 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,40 @@
+# Format Description:
+# Each key is of the form: Language.Property
+# Language : Book Language (case sensitive), Common Name for ISO639 Language
+# Each value is of the form:
+# For using Stem/StopWord : boolean (true/false)
+# For Analyzer : BaseAnalyzer implementation
+
+#Default properties
+# SimpleLuceneAnalyzer provides same function as org.apache.lucene.analysis.SimpleAnalyzer + Accent
+# normalization for ISO8859-1 languages
+Default.Analyzer=org.crosswire.jsword.index.lucene.analysis.SimpleLuceneAnalyzer
+Default.Stemming=true
+Default.StopWord=false
+
+#EnglishLuceneAnalyzer uses Porter stemming
+English.Analyzer=org.crosswire.jsword.index.lucene.analysis.EnglishLuceneAnalyzer
+
+# Custom Analyzers
+Czech.Analyzer=org.crosswire.jsword.index.lucene.analysis.CzechLuceneAnalyzer
+German.Analyzer=org.crosswire.jsword.index.lucene.analysis.GermanLuceneAnalyzer
+Greek.Analyzer=org.crosswire.jsword.index.lucene.analysis.GreekLuceneAnalyzer
+
+# Snowball Based Analyzers
+Danish.Analyzer=org.crosswire.jsword.index.lucene.analysis.ConfigurableSnowballAnalyzer
+Dutch.Analyzer=org.crosswire.jsword.index.lucene.analysis.ConfigurableSnowballAnalyzer
+Finnish.Analyzer=org.crosswire.jsword.index.lucene.analysis.ConfigurableSnowballAnalyzer
+French.Analyzer=org.crosswire.jsword.index.lucene.analysis.ConfigurableSnowballAnalyzer
+#German.Analyzer=org.crosswire.jsword.index.lucene.analysis.ConfigurableSnowballAnalyzer
+Italian.Analyzer=org.crosswire.jsword.index.lucene.analysis.ConfigurableSnowballAnalyzer
+Norwegian.Analyzer=org.crosswire.jsword.index.lucene.analysis.ConfigurableSnowballAnalyzer
+Portuguese.Analyzer=org.crosswire.jsword.index.lucene.analysis.ConfigurableSnowballAnalyzer
+Russian.Analyzer=org.crosswire.jsword.index.lucene.analysis.ConfigurableSnowballAnalyzer
+Spanish.Analyzer=org.crosswire.jsword.index.lucene.analysis.ConfigurableSnowballAnalyzer
+Swedish.Analyzer=org.crosswire.jsword.index.lucene.analysis.ConfigurableSnowballAnalyzer
+
+
+# Chinese Japanese Thai Languages
+Chinese.Analyzer=org.crosswire.jsword.index.lucene.analysis.ChineseLuceneAnalyzer
+Japanese.Analyzer=org.crosswire.jsword.index.lucene.analysis.ChineseLuceneAnalyzer
+Thai.Analyzer=org.crosswire.jsword.index.lucene.analysis.ThaiLuceneAnalyzer
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzer.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,55 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.cn.ChineseAnalyzer;
+
+/**
+ * Uses org.apache.lucene.analysis.cn.ChineseAnalyzer
+ * Analysis: ChineseTokenizer, ChineseFilter
+ * StopFilter, Stemming not implemented yet
+ *
+ * Note: org.apache.lucene.analysis.cn.CJKAnalyzer takes overlapping two character tokenization approach
+ * which leads to larger index size.
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author Sijo Cherian [sijocherian at yahoo dot com]
+ */
+public class ChineseLuceneAnalyzer extends AbstractAnalyzer
+{
+ public ChineseLuceneAnalyzer()
+ {
+ myAnalyzer = new ChineseAnalyzer();
+ setNaturalLanguage("Chinese"); //$NON-NLS-1$
+ }
+
+ public final TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ return myAnalyzer.tokenStream(fieldName, reader);
+ }
+
+ private ChineseAnalyzer myAnalyzer;
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzer.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,133 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopAnalyzer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.de.GermanAnalyzer;
+import org.apache.lucene.analysis.fr.FrenchAnalyzer;
+import org.apache.lucene.analysis.nl.DutchAnalyzer;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+
+/**
+ * Analyzer class to use as a Snowball Analyzer
+ * Default behavior: Stemming is done, Stop words not removed
+ * A snowball stemmer can be configured by passing the stemmer name to setNaturalLanguage() method.
+ * Currently it takes following stemmer names (available stemmers in lucene snowball package net.sf.snowball.ext)
+ Danish
+ Dutch
+ English
+ Finnish
+ French
+ German2
+ German
+ Italian
+ Kp
+ Lovins
+ Norwegian
+ Porter
+ Portuguese
+ Russian
+ Spanish
+ Swedish
+
+ This list is expected to expand, as and when Snowball project support more languages
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author sijo cherian [sijocherian at yahoo dot com]
+ */
+public class ConfigurableSnowballAnalyzer extends AbstractAnalyzer
+{
+ public ConfigurableSnowballAnalyzer()
+ {
+ }
+
+ /**
+ * Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
+ * LowerCaseFilter}, {@link StopFilter} if enabled and
+ * {@link SnowballFilter}.
+ */
+ public final TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ TokenStream result = new StandardTokenizer(reader);
+ result = new StandardFilter(result);
+ result = new LowerCaseFilter(result);
+ if (doStopWords && stopSet != null)
+ {
+ result = new StopFilter(result, stopSet);
+ }
+
+ // Configure Snowball filter based on language/stemmername
+ if (doStemming)
+ {
+ result = new SnowballFilter(result, stemmerName);
+ }
+
+ return result;
+ }
+
+ public void setNaturalLanguage(String name)
+ {
+ naturalLanguage = name;
+ // stemmer name are same as language name, in most cases
+ stemmerName = name;
+
+ // Check for allowed stemmers
+ if (!allowedStemmers.matcher(stemmerName).matches())
+ {
+ throw new IllegalArgumentException("SnowballAnalyzer configured for unavailable stemmer " + stemmerName); //$NON-NLS-1$
+ }
+
+ // Initialize the default stop words
+ if (defaultStopWordMap.containsKey(name))
+ {
+ stopSet = StopFilter.makeStopSet((String[]) defaultStopWordMap.get(name));
+ }
+ }
+
+ private static Pattern allowedStemmers = Pattern.compile("(Danish|Dutch|English|Finnish|French|German2|German|Italian|Kp|Lovins|Norwegian|Porter|Portuguese|Russian|Spanish|Swedish)"); //$NON-NLS-1$
+
+ // Maps StemmerName > String array of standard stop words
+ private static HashMap defaultStopWordMap = new HashMap();
+
+ private String stemmerName;
+
+ static
+ {
+ defaultStopWordMap.put("French", FrenchAnalyzer.FRENCH_STOP_WORDS); //$NON-NLS-1$
+ defaultStopWordMap.put("German", GermanAnalyzer.GERMAN_STOP_WORDS); //$NON-NLS-1$
+ defaultStopWordMap.put("German2", GermanAnalyzer.GERMAN_STOP_WORDS); //$NON-NLS-1$
+ defaultStopWordMap.put("Dutch", DutchAnalyzer.DUTCH_STOP_WORDS); //$NON-NLS-1$
+ defaultStopWordMap.put("English", StopAnalyzer.ENGLISH_STOP_WORDS); //$NON-NLS-1$
+ defaultStopWordMap.put("Porter", StopAnalyzer.ENGLISH_STOP_WORDS); //$NON-NLS-1$
+
+ }
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/CzechLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/CzechLuceneAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/CzechLuceneAnalyzer.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,68 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.cz.CzechAnalyzer;
+
+/**
+ * Uses org.apache.lucene.analysis.cz.CzechAnalyzer
+ * Analysis: standardTokenizer, StandardFilter, LowerCaseFilter, StopFilter
+ * Stemming not implemented yet
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author Sijo Cherian [sijocherian at yahoo dot com]
+ */
+public class CzechLuceneAnalyzer extends AbstractAnalyzer
+{
+ public CzechLuceneAnalyzer()
+ {
+ // Construct Analyzer that do not use stop words
+ myAnalyzer = new CzechAnalyzer(new String[0]);
+ setNaturalLanguage("Czech"); //$NON-NLS-1$
+ }
+
+ public final TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ return myAnalyzer.tokenStream(fieldName, reader);
+ }
+
+ public void setStopWords(String[] stopWords)
+ {
+ myAnalyzer = new CzechAnalyzer(stopWords);
+ }
+
+ public void setDoStopWords(boolean doIt)
+ {
+ doStopWords = doIt;
+ // Analyzer that uses stop word
+ if (doStopWords)
+ {
+ myAnalyzer = new CzechAnalyzer();
+ }
+ }
+
+ private CzechAnalyzer myAnalyzer;
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzer.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,73 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: KeyAnalyzer.java 1376 2007-06-01 18:27:01Z dmsmith $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.LowerCaseTokenizer;
+import org.apache.lucene.analysis.PorterStemFilter;
+import org.apache.lucene.analysis.StopAnalyzer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * English Analyzer works like lucene SimpleAnalyzer + Stemming.
+ * (LowerCaseTokenizer > PorterStemFilter).
+ * Like the AbstractAnalyzer, Stop word filter is off by default.
+ *
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author sijo cherian [sijocherian at yahoo dot com]
+ */
+public class EnglishLuceneAnalyzer extends AbstractAnalyzer
+{
+
+ public EnglishLuceneAnalyzer()
+ {
+ stopSet = StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS);
+ setNaturalLanguage("English"); //$NON-NLS-1$
+ }
+
+ /**
+ * Constructs a {@link LowerCaseTokenizer} filtered by a
+ * language filter {@link StopFilter} and {@link PorterStemFilter} for English.
+ */
+ public final TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ TokenStream result = new LowerCaseTokenizer(reader);
+
+ if (doStopWords && stopSet != null)
+ {
+ result = new StopFilter(result, stopSet);
+ }
+
+ // Using Porter Stemmer
+ if (doStemming)
+ {
+ result = new PorterStemFilter(result);
+ }
+
+ return result;
+ }
+
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GermanLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GermanLuceneAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GermanLuceneAnalyzer.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,64 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: KeyAnalyzer.java 1376 2007-06-01 18:27:01Z dmsmith $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.LowerCaseTokenizer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.de.GermanAnalyzer;
+import org.apache.lucene.analysis.de.GermanStemFilter;
+
+/**
+ * Based on Lucene's GermanAnalyzer
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author Sijo Cherian [sijocherian at yahoo dot com]
+ */
+public class GermanLuceneAnalyzer extends AbstractAnalyzer
+{
+
+ public TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ TokenStream result = new LowerCaseTokenizer(reader);
+
+ if (doStopWords && stopSet != null)
+ {
+ result = new StopFilter(result, stopSet);
+ }
+
+ if (doStemming)
+ {
+ result = new GermanStemFilter(result);
+ }
+
+ return result;
+ }
+
+ public GermanLuceneAnalyzer()
+ {
+ stopSet = StopFilter.makeStopSet(GermanAnalyzer.GERMAN_STOP_WORDS);
+ setNaturalLanguage("German"); //$NON-NLS-1$
+ }
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzer.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,69 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: KeyAnalyzer.java 1376 2007-06-01 18:27:01Z dmsmith $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.el.GreekAnalyzer;
+import org.apache.lucene.analysis.el.GreekCharsets;
+
+/**
+ * Uses org.apache.lucene.analysis.el.GreekAnalyzer to do lowercasing and stopword(off by default).
+ * Stemming not implemented yet
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author Sijo Cherian [sijocherian at yahoo dot com]
+ */
+public class GreekLuceneAnalyzer extends AbstractAnalyzer
+{
+ public GreekLuceneAnalyzer()
+ {
+ //Construct GreekAnalyzer that do not use stop words
+ myAnalyzer = new GreekAnalyzer(GreekCharsets.UnicodeGreek, new String[0]);
+ setNaturalLanguage("Greek"); //$NON-NLS-1$
+ }
+
+ public final TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ return myAnalyzer.tokenStream(fieldName, reader);
+ }
+
+ public void setStopWords(String[] stopWords)
+ {
+ myAnalyzer = new GreekAnalyzer(GreekCharsets.UnicodeGreek, stopWords);
+ }
+
+ public void setDoStopWords(boolean doIt)
+ {
+ doStopWords = doIt;
+
+ //GreekAnalyzer that uses stop word
+ if (doStopWords)
+ {
+ myAnalyzer = new GreekAnalyzer();
+ }
+ }
+
+ private GreekAnalyzer myAnalyzer;
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/SimpleLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/SimpleLuceneAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/SimpleLuceneAnalyzer.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,68 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: KeyAnalyzer.java 1376 2007-06-01 18:27:01Z dmsmith $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.ISOLatin1AccentFilter;
+import org.apache.lucene.analysis.LowerCaseTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Simple Analyzer providing same function as org.apache.lucene.analysis.SimpleAnalyzer
+ * This is intended to be the default analyzer for natural language fields.
+ * Additionally performs:
+ * Normalize Diacritics (Changes Accented characters to their unaccented equivalent) for ISO 8859-1 languages
+ *
+ * Note: Next Lucene release (beyond 2.2.0) will have a major performance enhancement using method -
+ * public TokenStream reusableTokenStream(String fieldName, Reader reader)
+ * We should use that.
+ * Ref: https://issues.apache.org/jira/browse/LUCENE-969
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author Sijo Cherian [sijocherian at yahoo dot com]
+ */
+public class SimpleLuceneAnalyzer extends AbstractAnalyzer
+{
+
+ public SimpleLuceneAnalyzer()
+ {
+ doStemming = false;
+ }
+
+ public TokenStream tokenStream(String fieldName, Reader reader)
+ {
+
+ TokenStream result = new LowerCaseTokenizer(reader);
+
+ if (naturalLanguage != null && isoLatin1Langs.matcher(naturalLanguage).matches())
+ {
+ result = new ISOLatin1AccentFilter(result);
+ }
+
+ return result;
+ }
+
+ private static Pattern isoLatin1Langs = Pattern.compile("(Afrikaans|Albanian|Basque|Breton|Catalan|Danish|Dutch|English|Estonian|Faroese|French|Finnish|Galician|German|Icelandic|Irish|Italian|Latin|Luxembourgish|Norwegian|Occitan|Portuguese|Romansh|Scottish Gaelic|Spanish|Swahili|Swedish|Walloon)"); //$NON-NLS-1$
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzer.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzer.java (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzer.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,57 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: KeyAnalyzer.java 1376 2007-06-01 18:27:01Z dmsmith $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.th.ThaiWordFilter;
+
+/**
+ * Tokenization using ThaiWordFilter. It uses java.text.BreakIterator to break words.
+ * Stemming: Not implemented
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author sijo cherian [sijocherian at yahoo dot com]
+ */
+public class ThaiLuceneAnalyzer extends AbstractAnalyzer
+{
+
+ public TokenStream tokenStream(String fieldName, Reader reader)
+ {
+ TokenStream ts = new StandardTokenizer(reader);
+ ts = new ThaiWordFilter(ts);
+ if (doStopWords && stopSet != null)
+ {
+ ts = new StopFilter(ts, stopSet);
+ }
+ return ts;
+ }
+
+ public ThaiLuceneAnalyzer()
+ {
+ setNaturalLanguage("Thai"); //$NON-NLS-1$
+ }
+}
Added: trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/package.html
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/package.html (rev 0)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/index/lucene/analysis/package.html 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,9 @@
+<html>
+<body>
+
+<p>
+ Implementation of various Lucene analyzers, providing language dependent customizations.
+</p>
+
+</body>
+</html>
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/passage/Verse.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/passage/Verse.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/passage/Verse.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -70,7 +70,7 @@
public Verse()
{
originalName = null;
-
+
book = DEFAULT.book;
chapter = DEFAULT.chapter;
verse = DEFAULT.verse;
Modified: trunk/jsword/src/main/java/org/crosswire/jsword/passage/VerseRange.java
===================================================================
--- trunk/jsword/src/main/java/org/crosswire/jsword/passage/VerseRange.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/main/java/org/crosswire/jsword/passage/VerseRange.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -156,7 +156,7 @@
{
return shaper.shape(rangeName);
}
-
+
return rangeName;
}
catch (NoSuchVerseException ex)
Modified: trunk/jsword/src/test/java/JSwordAllTests.java
===================================================================
--- trunk/jsword/src/test/java/JSwordAllTests.java 2007-10-19 14:44:26 UTC (rev 1700)
+++ trunk/jsword/src/test/java/JSwordAllTests.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -60,6 +60,7 @@
suite.addTestSuite(org.crosswire.jsword.book.BooksTest.class);
suite.addTestSuite(org.crosswire.jsword.book.BookMetaDataTest.class);
suite.addTestSuite(org.crosswire.jsword.book.SentanceUtilTest.class);
+
// run independently: suite.addTestSuite(org.crosswire.jsword.book.ReadEverything.class);
// commented out because the tests were very poor.
//suite.addTestSuite(org.crosswire.jsword.book.OsisTest.class);
@@ -68,6 +69,13 @@
suite.addTestSuite(org.crosswire.jsword.index.search.parse.ParserTest.class);
suite.addTestSuite(org.crosswire.jsword.index.search.parse.WordsTest.class);
*/
+
+ suite.addTestSuite(org.crosswire.jsword.index.lucene.analysis.AnalyzerFactoryTest.class);
+ suite.addTestSuite(org.crosswire.jsword.index.lucene.analysis.ChineseLuceneAnalyzerTest.class);
+ suite.addTestSuite(org.crosswire.jsword.index.lucene.analysis.ConfigurableSnowballAnalyzerTest.class);
+ suite.addTestSuite(org.crosswire.jsword.index.lucene.analysis.EnglishLuceneAnalyzerTest.class);
+ suite.addTestSuite(org.crosswire.jsword.index.lucene.analysis.GreekLuceneAnalyzerTest.class);
+ suite.addTestSuite(org.crosswire.jsword.index.lucene.analysis.ThaiLuceneAnalyzerTest.class);
return suite;
}
}
Added: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactoryTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactoryTest.java (rev 0)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/AnalyzerFactoryTest.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,129 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+
+/**
+ *
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author sijo cherian [sijocherian at yahoo dot com]
+ */
+public class AnalyzerFactoryTest extends TestCase
+{
+
+ /* (non-Javadoc)
+ * @see junit.framework.TestCase#setUp()
+ */
+ protected void setUp() throws Exception
+ {
+ super.setUp();
+ }
+
+ /* (non-Javadoc)
+ * @see junit.framework.TestCase#tearDown()
+ */
+ protected void tearDown() throws Exception
+ {
+ super.tearDown();
+ }
+
+ /**
+ * Test method for {@link org.crosswire.jsword.index.lucene.analysis.AnalyzerFactory#createAnalyzer(java.lang.String)}.
+ */
+ public void testCreateAnalyzer()
+ {
+ Analyzer myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(""); //$NON-NLS-1$
+ assertTrue(myAnalyzer!=null);
+
+ myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer(null);
+ assertTrue(myAnalyzer!=null);
+ myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer("Unknown"); //$NON-NLS-1$
+ assertTrue(myAnalyzer!=null);
+ }
+
+ public void testEngStemming() throws ParseException
+ {
+ AbstractAnalyzer myAnalyzer = new EnglishLuceneAnalyzer();
+
+ QueryParser parser = new QueryParser(field, myAnalyzer);
+
+ String testInput = "Surely will every man walketh"; //$NON-NLS-1$
+ Query query = parser.parse(testInput);
+ //assertTrue(myAnalyzer instanceof SimpleLuceneAnalyzer);
+
+ //After Diacritic filtering
+ assertTrue(query.toString().indexOf(field+":sure ") > -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field+":everi") > -1); //$NON-NLS-1$
+
+ myAnalyzer.setDoStemming(false);
+ query = parser.parse(testInput);
+ assertTrue(query.toString().indexOf(field+":surely") > -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field+":every") > -1); //$NON-NLS-1$
+
+ //enable stop word
+ myAnalyzer.setDoStopWords(true);
+ query = parser.parse(testInput);
+ assertTrue(query.toString().indexOf(field+":will") == -1); //$NON-NLS-1$
+
+ //set custom stop word
+ myAnalyzer.setDoStopWords(true);
+ String[] stopWords = {"thy", "ye","unto","shalt"}; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
+ myAnalyzer.setStopWords(stopWords);
+ testInput = "Upon thy belly Shalt thou go"; //$NON-NLS-1$
+ query = parser.parse(testInput);
+ assertTrue(query.toString().indexOf(field+":shalt") == -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field+":thy") == -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field+":upon") > -1); //$NON-NLS-1$
+
+
+ System.out.println(query.toString());
+ }
+
+ public void testLatin1Language() throws ParseException {
+ Analyzer myAnalyzer = AnalyzerFactory.getInstance().createAnalyzer("Latin"); //$NON-NLS-1$
+
+ QueryParser parser = new QueryParser(field, myAnalyzer);
+
+ String testInput = "test \u00D9\u00EB\u0153"; //$NON-NLS-1$
+ assertTrue(myAnalyzer instanceof SimpleLuceneAnalyzer);
+ Query query = parser.parse(testInput);
+ //After Diacritic filtering
+ assertTrue(query.toString().indexOf(field+":ueoe") > -1); //$NON-NLS-1$
+
+ testInput = "A\u00C1"; //$NON-NLS-1$
+ query = parser.parse(testInput);
+ //After Diacritic filtering
+ assertTrue(query.toString().indexOf(field+":aa") > -1); //$NON-NLS-1$
+
+
+ }
+
+ protected static final String field = "content"; //$NON-NLS-1$
+}
Added: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzerTest.java (rev 0)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ChineseLuceneAnalyzerTest.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,67 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+
+import junit.framework.TestCase;
+
+/**
+ * Tokenization and query parsing test
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author Sijo Cherian [sijocherian at yahoo dot com]
+ */
+public class ChineseLuceneAnalyzerTest extends TestCase
+{
+
+ protected void setUp() throws Exception
+ {
+ super.setUp();
+ }
+
+ protected void tearDown() throws Exception
+ {
+ super.tearDown();
+ }
+
+ public void testTokenization() throws ParseException
+ {
+ myAnalyzer = new ChineseLuceneAnalyzer();
+ parser = new QueryParser(field, myAnalyzer);
+
+ String testInput="\u795E\u7231\u4E16\u4EBA\uFF0C\u751A\u81F3\u628A\u4ED6\u7684\u72EC\u751F\u5B50\u8D50\u7ED9\u4ED6\u4EEC"; //$NON-NLS-1$
+
+
+ Query query = parser.parse(testInput);
+ assertTrue(query.toString().indexOf(field+":\"\u795E \u7231") > -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf("\u4ED6 \u4EEC\"") > -1); //$NON-NLS-1$
+ //System.out.println(query.toString());
+ }
+
+ protected static final String field = "content"; //$NON-NLS-1$
+ private AbstractAnalyzer myAnalyzer;
+ private QueryParser parser;
+}
Added: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzerTest.java (rev 0)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ConfigurableSnowballAnalyzerTest.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,146 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+
+/**
+ * Snowball Analyzer test for stemming, stop word
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author Sijo Cherian [sijocherian at yahoo dot com]
+ */
+public class ConfigurableSnowballAnalyzerTest extends TestCase
+{
+
+ protected void setUp() throws Exception
+ {
+ super.setUp();
+ myAnalyzer = new ConfigurableSnowballAnalyzer();
+ parser = new QueryParser(field, myAnalyzer);
+ }
+
+ protected void tearDown() throws Exception
+ {
+ super.tearDown();
+ }
+
+ public void testStemmers()
+ {
+
+ //valid
+ myAnalyzer.setNaturalLanguage("Finnish"); //$NON-NLS-1$
+ //invalid stemmer
+ try {
+ myAnalyzer.setNaturalLanguage("test"); //$NON-NLS-1$
+ }
+ catch (IllegalArgumentException e) {
+ assertTrue (e.getMessage().indexOf("SnowballAnalyzer") > -1); //$NON-NLS-1$
+ }
+ }
+
+ public void testStemming() throws ParseException
+ {
+
+ myAnalyzer.setNaturalLanguage("French"); //$NON-NLS-1$
+
+ String testInput=" tant aim� le monde qu 'il a donn� son"; //$NON-NLS-1$
+
+
+ Query query = parser.parse(testInput);
+ assertTrue(query.toString().indexOf(field+":aim ") > -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field+":mond ") > -1); //$NON-NLS-1$
+ //System.out.println(query.toString());
+ }
+
+ public void testStopwords() throws ParseException
+ {
+
+ myAnalyzer.setNaturalLanguage("French"); //$NON-NLS-1$
+ myAnalyzer.setDoStopWords(true);
+ String testInput=" tant aim� le monde qu 'il a donn� son"; //$NON-NLS-1$
+
+ Query query = parser.parse(testInput);
+ assertTrue(query.toString().indexOf(field+":le") == -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field+":a ") == -1); //$NON-NLS-1$
+
+ }
+
+ public void testStemmingOff() throws ParseException
+ {
+
+ myAnalyzer.setNaturalLanguage("French"); //$NON-NLS-1$
+ myAnalyzer.setDoStemming(false);
+
+ String testInput=" tant aim� le monde qu 'il a donn� son"; //$NON-NLS-1$
+
+
+ Query query = parser.parse(testInput);
+ //System.out.println(query.toString());
+ assertTrue(query.toString().indexOf(field+":aim� ") > -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field+":donn� ") > -1); //$NON-NLS-1$
+ }
+
+ public void testStemmerConfig() throws ParseException
+ {
+
+ myAnalyzer.setNaturalLanguage("French"); //$NON-NLS-1$
+ myAnalyzer.setDoStemming(false);
+
+ String testInput=" tant aim� le monde qu 'il a donn� son"; //$NON-NLS-1$
+
+
+ Query query = parser.parse(testInput);
+ assertTrue(query.toString().indexOf(field+":aim� ") > -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field+":donn� ") > -1); //$NON-NLS-1$
+
+ }
+
+ public void testMultipleStemmers() throws ParseException
+ {
+
+ myAnalyzer.setNaturalLanguage("German"); //$NON-NLS-1$
+
+ String testInput="Denn also hat Gott die Welt geliebt, da� er seinen eingeborenen Sohn gab, auf da� jeder, der an ihn glaubt, nicht verloren gehe, sondern ewiges Leben habe"; //$NON-NLS-1$
+
+ Query query = parser.parse(testInput);
+ assertTrue(query.toString().indexOf(field+":denn ") > -1); //$NON-NLS-1$
+
+ //System.out.println(query.toString());
+
+ //Compare with custom analyzer
+ Analyzer anal= new GermanLuceneAnalyzer();
+ QueryParser gparser = new QueryParser(field, anal);
+ query = gparser.parse(testInput);
+ assertTrue(query.toString().indexOf(field+":denn ") > -1); //$NON-NLS-1$
+
+ }
+ protected static final String field = "content"; //$NON-NLS-1$
+ private AbstractAnalyzer myAnalyzer;
+ private QueryParser parser;
+}
Added: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzerTest.java (rev 0)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/EnglishLuceneAnalyzerTest.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,106 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+
+import junit.framework.TestCase;
+
+/**
+ * Test the English Analyzer
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author Sijo Cherian [sijocherian at yahoo dot com]
+ */
+public class EnglishLuceneAnalyzerTest extends TestCase
+{
+
+ protected void setUp() throws Exception
+ {
+ super.setUp();
+ myAnalyzer = new EnglishLuceneAnalyzer();
+
+ parser = new QueryParser(field, myAnalyzer);
+
+ }
+
+ protected void tearDown() throws Exception
+ {
+ super.tearDown();
+ }
+
+ public void testDefaultBehavior() throws ParseException
+ {
+ String testInput = "Surely will every man walketh"; //$NON-NLS-1$
+ Query query = parser.parse(testInput);
+
+ //stemming on
+ assertTrue(query.toString().indexOf(field+":sure ") > -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field+":everi") > -1); //$NON-NLS-1$
+ }
+
+ public void testSetDoStopWords() throws ParseException
+ {
+ String testInput = "Surely will every man walketh"; //$NON-NLS-1$
+ Query query = parser.parse(testInput);
+
+ //enable stop word
+ myAnalyzer.setDoStopWords(true);
+ query = parser.parse(testInput);
+ assertTrue(query.toString().indexOf(field+":will") == -1); //$NON-NLS-1$
+
+ //set custom stop word
+ myAnalyzer.setDoStopWords(true);
+ String[] stopWords = {"thy", "ye","unto","shalt"}; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
+ myAnalyzer.setStopWords(stopWords);
+ testInput = "Upon thy belly Shalt thou go"; //$NON-NLS-1$
+ query = parser.parse(testInput);
+ //System.out.println("ParsedQuery- "+ query.toString());
+
+ assertTrue(query.toString().indexOf(field+":shalt") == -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field+":thy") == -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field+":upon") > -1); //$NON-NLS-1$
+
+
+ }
+
+ public void testSetDoStemming() throws ParseException
+ {
+ String testInput = "Surely will every man walketh"; //$NON-NLS-1$
+ Query query = parser.parse(testInput);
+
+ myAnalyzer.setDoStemming(false);
+ query = parser.parse(testInput);
+ assertTrue(query.toString().indexOf(field+":surely") > -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field+":every") > -1); //$NON-NLS-1$
+
+ }
+
+
+
+ protected static final String field = "content"; //$NON-NLS-1$
+ private AbstractAnalyzer myAnalyzer;
+ private QueryParser parser;
+}
Added: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzerTest.java (rev 0)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/GreekLuceneAnalyzerTest.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,69 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+
+/**
+ * Test the Greek Analyzer
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author Sijo Cherian [sijocherian at yahoo dot com]
+ */
+public class GreekLuceneAnalyzerTest extends TestCase
+{
+
+ protected void setUp() throws Exception
+ {
+ super.setUp();
+ myAnalyzer = new GreekLuceneAnalyzer();
+
+ parser = new QueryParser(field, myAnalyzer);
+ }
+
+ protected void tearDown() throws Exception
+ {
+ super.tearDown();
+ }
+
+ public void testTokenization() throws ParseException
+ {
+ //From john 3:16
+
+ String testInput="\u0394\u03B9\u03BF\u03C4\u03B9 \u03C4\u03BF\u03C3\u03BF\u03BD \u03B7\u03B3\u03B1\u03C0\u03B7\u03C3\u03B5\u03BD \u03BF \u0398\u03B5\u03BF\u03C2 \u03C4\u03BF\u03BD \u03BA\u03BF\u03C3\u03BC\u03BF\u03BD\u002C \u03C9\u03C3\u03C4\u03B5 \u03B5\u03B4\u03C9\u03BA\u03B5 \u03C4\u03BF\u03BD \u03A5\u03B9\u03BF\u03BD \u03B1\u03C5\u03C4\u03BF\u03C5"; //$NON-NLS-1$
+ Query query = parser.parse(testInput);
+ //System.out.println(query.toString());
+ //Lowercased test
+ assertTrue(query.toString().indexOf(field + ":\u03B4\u03B9\u03BF\u03C4\u03B9 ") > -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field + ":\u03B1\u03C5\u03C4\u03BF\u03C5") > -1); //$NON-NLS-1$
+
+ }
+
+ protected static final String field = "content"; //$NON-NLS-1$
+ private AbstractAnalyzer myAnalyzer;
+ private QueryParser parser;
+}
Added: trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzerTest.java
===================================================================
--- trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzerTest.java (rev 0)
+++ trunk/jsword/src/test/java/org/crosswire/jsword/index/lucene/analysis/ThaiLuceneAnalyzerTest.java 2007-10-24 20:15:07 UTC (rev 1701)
@@ -0,0 +1,79 @@
+/**
+ * Distribution License:
+ * JSword is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License, version 2.1 as published by
+ * the Free Software Foundation. This program is distributed in the hope
+ * that it will be useful, but WITHOUT ANY WARRANTY; without even the
+ * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU Lesser General Public License for more details.
+ *
+ * The License is available on the internet at:
+ * http://www.gnu.org/copyleft/lgpl.html
+ * or by writing to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place - Suite 330
+ * Boston, MA 02111-1307, USA
+ *
+ * Copyright: 2007
+ * The copyright to this program is held by it's authors.
+ *
+ * ID: $Id: $
+ */
+package org.crosswire.jsword.index.lucene.analysis;
+
+
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Query;
+
+import junit.framework.TestCase;
+
+/**
+ * Test the Thai Analyzer
+ *
+ * @see gnu.lgpl.License for license details.<br>
+ * The copyright to this program is held by it's authors.
+ * @author Sijo Cherian [sijocherian at yahoo dot com]
+ */
+public class ThaiLuceneAnalyzerTest extends TestCase
+{
+
+ protected void setUp() throws Exception
+ {
+ super.setUp();
+ myAnalyzer = new ThaiLuceneAnalyzer();
+
+ parser = new QueryParser(field, myAnalyzer);
+ }
+
+ protected void tearDown() throws Exception
+ {
+ super.tearDown();
+ }
+
+ public void testDefaultBehavior() throws ParseException
+ {
+ String testInput="\u0E1A\u0E38\u0E15\u0E23\u0E21\u0E19\u0E38\u0E29\u0E22\u0E4C\u0E08\u0E30\u0E15\u0E49\u0E2D"; //$NON-NLS-1$
+
+ Query query = parser.parse(testInput);
+ //System.out.println(query.toString());
+ assertTrue(query.toString().indexOf(field+":\"\u0E1A\u0E38\u0E15\u0E23 \u0E21") > -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf("\u0E4C \u0E08\u0E30 \u0E15\u0E49\u0E2D") > -1); //$NON-NLS-1$
+ }
+
+ public void testWhitespaceQuery() throws ParseException
+ {
+ //From john 3:3
+ String testInput="\u0E40\u0E23\u0E32\u0E1A\u0E2D\u0E01\u0E04\u0E27\u0E32\u0E21\u0E08\u0E23\u0E34\u0E07\u0E41\u0E01\u0E48\u0E17\u0E48\u0E32\u0E19\u0E27\u0E48\u0E32 \u0E16\u0E49\u0E32\u0E1C\u0E39\u0E49\u0E43\u0E14\u0E44\u0E21\u0E48\u0E44\u0E14\u0E49\u0E1A\u0E31\u0E07\u0E40\u0E01\u0E34\u0E14\u0E43\u0E2B\u0E21\u0E48"; //$NON-NLS-1$
+
+ Query query = parser.parse(testInput);
+ //System.out.println(query.toString());
+ assertTrue(query.toString().indexOf(field + ":\"\u0E40\u0E23\u0E32 \u0E1A") > -1); //$NON-NLS-1$
+ assertTrue(query.toString().indexOf(field + ":\"\u0E16\u0E49\u0E32 \u0E1C") > -1); //$NON-NLS-1$
+ }
+
+
+ protected static final String field = "content"; //$NON-NLS-1$
+ private AbstractAnalyzer myAnalyzer;
+ private QueryParser parser;
+}
More information about the jsword-svn
mailing list