[jsword-svn]
jsword/java/jsword/org/crosswire/jsword/book/search/lucene s
jswordcvs at crosswire.org
jswordcvs at crosswire.org
Wed Sep 29 15:21:26 MST 2004
Update of /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/search/lucene
In directory www.crosswire.org:/tmp/cvs-serv8429/java/jsword/org/crosswire/jsword/book/search/lucene
Modified Files:
Msg.java Msg.properties
Added Files:
LuceneIndex.java
Removed Files:
LuceneSearchEngine.java
Log Message:
Fixes for [JS-7] and [JS-6]
Lots of search work and re-factoring
--- NEW FILE: LuceneIndex.java ---
package org.crosswire.jsword.book.search.lucene;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.crosswire.common.activate.Activatable;
import org.crosswire.common.activate.Activator;
import org.crosswire.common.activate.Lock;
import org.crosswire.common.progress.Job;
import org.crosswire.common.util.Logger;
import org.crosswire.common.util.NetUtil;
import org.crosswire.common.util.Reporter;
import org.crosswire.jsword.book.Book;
import org.crosswire.jsword.book.BookData;
import org.crosswire.jsword.book.BookException;
import org.crosswire.jsword.book.search.Index;
import org.crosswire.jsword.book.search.IndexManager;
import org.crosswire.jsword.passage.BibleInfo;
import org.crosswire.jsword.passage.Key;
import org.crosswire.jsword.passage.KeyUtil;
import org.crosswire.jsword.passage.NoSuchKeyException;
import org.crosswire.jsword.passage.NoSuchVerseException;
import org.crosswire.jsword.passage.PassageTally;
import org.crosswire.jsword.passage.Verse;
import org.crosswire.jsword.passage.VerseFactory;
import org.crosswire.jsword.util.Project;
/**
* Implement the SearchEngine using Lucene as the search engine.
*
* <p><table border='1' cellPadding='3' cellSpacing='0'>
* <tr><td bgColor='white' class='TableRowColor'><font size='-7'>
*
* Distribution Licence:<br />
* JSword is free software; you can redistribute it
* and/or modify it under the terms of the GNU General Public License,
* version 2 as published by the Free Software Foundation.<br />
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.<br />
* The License is available on the internet
* <a href='http://www.gnu.org/copyleft/gpl.html'>here</a>, or by writing to:
* Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
* MA 02111-1307, USA<br />
* The copyright to this program is held by it's authors.
* </font></td></tr></table>
* @see gnu.gpl.Licence
* @author Joe Walker [joe at eireneh dot com]
* @version $Id: LuceneIndex.java,v 1.1 2004/09/29 22:21:24 joe Exp $
*/
public class LuceneIndex implements Index, Activatable
{
/* (non-Javadoc)
* @see org.crosswire.jsword.book.search.SearchEngine#init(org.crosswire.jsword.book.Bible, java.net.URL)
*/
public void init(Book newBook) throws BookException
{
try
{
book = newBook;
String driverName = book.getBookMetaData().getDriverName();
String bookName = book.getBookMetaData().getInitials();
assert driverName != null;
assert bookName != null;
URL base = Project.instance().getTempScratchSpace(DIR_LUCENE, false);
URL driver = NetUtil.lengthenURL(base, driverName);
url = NetUtil.lengthenURL(driver, bookName);
if (isIndexed())
{
// Opening Lucene indexes is quite quick I think, so we can try
// it to see if it works to report errors that we want to drop
// later
searcher = new IndexSearcher(NetUtil.getAsFile(url).getCanonicalPath());
}
}
catch (IOException ex)
{
throw new BookException(Msg.LUCENE_INIT, ex);
}
}
/* (non-Javadoc)
* @see org.crosswire.jsword.book.search.SearchEngine#findKeyList(org.crosswire.jsword.book.Search)
*/
public Key findWord(String search) throws BookException
{
checkActive();
PassageTally tally = new PassageTally();
if (search != null)
{
if (searcher != null)
{
try
{
Analyzer analyzer = new StandardAnalyzer();
Query query = QueryParser.parse(search, LuceneIndex.FIELD_BODY, analyzer);
Hits hits = searcher.search(query);
for (int i = 0; i < hits.length(); i++)
{
Verse verse = VerseFactory.fromString(hits.doc(i).get(LuceneIndex.FIELD_NAME));
int score = (int) (hits.score(i) * 100);
tally.add(verse, score);
}
}
catch (Exception ex)
{
throw new BookException(Msg.SEARCH_FAILED, ex);
}
}
else
{
log.warn("Missing searcher, skipping search for: "+search); //$NON-NLS-1$
}
}
return tally;
}
/* (non-Javadoc)
* @see org.crosswire.jsword.book.search.Index#getKey(java.lang.String)
*/
public Key getKey(String name) throws NoSuchKeyException
{
return book.getKey(name);
}
/* (non-Javadoc)
* @see org.crosswire.jsword.book.search.SearchEngine#delete()
*/
public void delete() throws BookException
{
checkActive();
try
{
NetUtil.delete(url);
}
catch (IOException ex)
{
throw new BookException(Msg.DELETE_FAILED, ex);
}
}
/* (non-Javadoc)
* @see org.crosswire.jsword.book.search.AbstractIndex#isIndexed()
*/
public boolean isIndexed()
{
if (generating)
{
return false;
}
URL longer = NetUtil.lengthenURL(url, DIR_SEGMENTS);
return NetUtil.isFile(longer);
}
/* (non-Javadoc)
* @see org.crosswire.jsword.book.search.AbstractIndex#generateSearchIndex(org.crosswire.common.progress.Job)
*/
public void generateSearchIndex(Job job) throws IOException, BookException
{
// An index is created by opening an IndexWriter with the
// create argument set to true.
IndexWriter writer = new IndexWriter(NetUtil.getAsFile(url), new StandardAnalyzer(), true);
generateSearchIndexImpl(job, writer, book.getGlobalKeyList());
job.setProgress(95, Msg.OPTIMIZING.toString());
writer.optimize();
writer.close();
searcher = new IndexSearcher(NetUtil.getAsFile(url).getCanonicalPath());
}
/**
* Dig down into a Key indexing as we go.
*/
private void generateSearchIndexImpl(Job job, IndexWriter writer, Key key) throws BookException, IOException
{
int percent = 0;
for (Iterator it = key.iterator(); it.hasNext(); )
{
Key subkey = (Key) it.next();
if (subkey.canHaveChildren())
{
generateSearchIndexImpl(job, writer, subkey);
}
else
{
BookData data = book.getData(subkey);
Reader reader = new StringReader(data.getPlainText());
Document doc = new Document();
doc.add(Field.Text(FIELD_NAME, subkey.getName()));
doc.add(Field.Text(FIELD_BODY, reader));
writer.addDocument(doc);
// report progress
String name = ""; //$NON-NLS-1$
Verse verse = KeyUtil.getVerse(subkey);
try
{
percent = 95 * verse.getOrdinal() / BibleInfo.versesInBible();
name = BibleInfo.getLongBookName(verse.getBook());
}
catch (NoSuchVerseException ex)
{
log.error("Failed to get book name from verse: "+verse, ex); //$NON-NLS-1$
assert false;
name = subkey.getName();
}
job.setProgress(percent, Msg.INDEXING.toString(name));
// This could take a long time ...
Thread.yield();
if (Thread.currentThread().isInterrupted())
{
break;
}
}
}
}
/* (non-Javadoc)
* @see org.crosswire.jsword.book.search.SearchEngine#activate()
*/
public final void activate(Lock lock)
{
// Load the ascii Passage index
if (isIndexed())
{
try
{
searcher = new IndexSearcher(NetUtil.getAsFile(url).getCanonicalPath());
}
catch (IOException ex)
{
log.warn("second load failure", ex); //$NON-NLS-1$
}
}
else
{
IndexManager.instance().createIndex(this);
}
active = true;
}
/* (non-Javadoc)
* @see org.crosswire.jsword.book.search.SearchEngine#deactivate()
*/
public final void deactivate(Lock lock)
{
try
{
searcher.close();
searcher = null;
}
catch (IOException ex)
{
Reporter.informUser(this, ex);
}
active = false;
}
/**
* Helper method so we can quickly activate ourselves on access
*/
protected final void checkActive()
{
if (!active)
{
Activator.activate(this);
}
}
/**
* Are we active
*/
private boolean active = false;
/**
* The log stream
*/
private static final Logger log = Logger.getLogger(LuceneIndex.class);
/**
* Are we in the middle of generating an index?
*/
private boolean generating = false;
/**
* The lucene search index directory
*/
protected static final String DIR_LUCENE = "lucene"; //$NON-NLS-1$
/**
* The segments directory
*/
protected static final String DIR_SEGMENTS = "segments"; //$NON-NLS-1$
/**
* The Lucene field for the verse name
*/
protected static final String FIELD_NAME = "name"; //$NON-NLS-1$
/**
* The Lucene field for the verse contents
*/
protected static final String FIELD_BODY = "body"; //$NON-NLS-1$
/**
* The Book that we are indexing
*/
protected Book book;
/**
* The location of this index
*/
private URL url;
/**
* The Lucene search engine
*/
protected Searcher searcher;
}
--- LuceneSearchEngine.java DELETED ---
Index: Msg.properties
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/search/lucene/Msg.properties,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** Msg.properties 14 Jun 2004 05:27:22 -0000 1.1
--- Msg.properties 29 Sep 2004 22:21:24 -0000 1.2
***************
*** 6,11 ****
# It should have no spaces or other punctuation (e.g. _, -, ', ...)
! LuceneSearchEngine.LuceneInit=Failed to initialise Lucene search engine.
! LuceneSearchEngine.SearchFailed=Search failed.
! LuceneSearchEngine.Indexing=Indexing verse:
! LuceneSearchEngine.Optimizing=Optimizing
--- 6,12 ----
# It should have no spaces or other punctuation (e.g. _, -, ', ...)
! LuceneIndex.LuceneInit=Failed to initialise Lucene search engine.
! LuceneIndex.SearchFailed=Search failed.
! LuceneIndex.Indexing=Creating index. Processing {0}
! LuceneIndex.Optimizing=Optimizing
! LuceneIndex.DeleteFailed=Failed to delete search index
Index: Msg.java
===================================================================
RCS file: /cvs/jsword/jsword/java/jsword/org/crosswire/jsword/book/search/lucene/Msg.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -d -r1.8 -r1.9
*** Msg.java 13 Jun 2004 22:12:32 -0000 1.8
--- Msg.java 29 Sep 2004 22:21:24 -0000 1.9
***************
*** 29,36 ****
class Msg extends MsgBase
{
! static final Msg LUCENE_INIT = new Msg("LuceneSearchEngine.LuceneInit"); //$NON-NLS-1$
! static final Msg SEARCH_FAILED = new Msg("LuceneSearchEngine.SearchFailed"); //$NON-NLS-1$
! static final Msg INDEXING = new Msg("LuceneSearchEngine.Indexing"); //$NON-NLS-1$
! static final Msg OPTIMIZING = new Msg("LuceneSearchEngine.Optimizing"); //$NON-NLS-1$
/**
--- 29,37 ----
class Msg extends MsgBase
{
! static final Msg LUCENE_INIT = new Msg("LuceneIndex.LuceneInit"); //$NON-NLS-1$
! static final Msg SEARCH_FAILED = new Msg("LuceneIndex.SearchFailed"); //$NON-NLS-1$
! static final Msg INDEXING = new Msg("LuceneIndex.Indexing"); //$NON-NLS-1$
! static final Msg OPTIMIZING = new Msg("LuceneIndex.Optimizing"); //$NON-NLS-1$
! static final Msg DELETE_FAILED = new Msg("LuceneIndex.DeleteFailed"); //$NON-NLS-1$
/**
More information about the jsword-svn
mailing list