/*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team * * Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #ifndef _lucene_search_SearchHeader_ #define _lucene_search_SearchHeader_ #if defined(_LUCENE_PRAGMA_ONCE) # pragma once #endif #include "CLucene/index/IndexReader.h" #include "CLucene/index/Term.h" #include "Filter.h" #include "CLucene/document/Document.h" #include "Sort.h" #include "CLucene/util/VoidList.h" #include "Explanation.h" #include "Similarity.h" CL_NS_DEF(search) //predefine classes class Scorer; class Query; class Hits; class Sort; class FieldDoc; class TopFieldDocs; /** Expert: Returned by low-level search implementations. * @see TopDocs */ struct ScoreDoc { /** Expert: A hit document's number. * @see Searcher#doc(int32_t) */ int32_t doc; /** Expert: The score of this document for the query. */ float_t score; }; /** Expert: Returned by low-level search implementations. * @see Searcher#search(Query,Filter,int32_t) */ class TopDocs:LUCENE_BASE { public: /** Expert: The total number of hits for the query. * @see Hits#length() */ int32_t totalHits; /** Expert: The top hits for the query. */ ScoreDoc* scoreDocs; int32_t scoreDocsLength; /** Expert: Constructs a TopDocs. TopDocs takes ownership of the ScoreDoc array*/ TopDocs(const int32_t th, ScoreDoc* sds, int32_t scoreDocsLength); ~TopDocs(); }; // Lower-level search API. // @see Searcher#search(Query,HitCollector) class HitCollector: LUCENE_BASE { public: /** Called once for every non-zero scoring document, with the document number * and its score. * *
If, for example, an application wished to collect all of the hits for a * query in a BitSet, then it might:
* Searcher searcher = new IndexSearcher(indexReader); * final BitSet bits = new BitSet(indexReader.maxDoc()); * searcher.search(query, new HitCollector() { * public void collect(int32_t doc, float score) { * bits.set(doc); * } * }); ** *
Note: This is called in an inner search loop. For good search * performance, implementations of this method should not call * {@link Searcher#doc(int32_t)} or * {@link IndexReader#document(int32_t)} on every * document number encountered. Doing so can slow searches by an order * of magnitude or more. *
Note: The score
passed to this method is a raw score.
* In other words, the score will not necessarily be a float whose value is
* between 0 and 1.
*/
virtual void collect(const int32_t doc, const float_t score) = 0;
virtual ~HitCollector(){}
};
/** Expert: Calculate query weights and build query scorers.
*
*
A Weight is constructed by a query, given a Searcher ({@link
* Query#_createWeight(Searcher)}). The {@link #sumOfSquaredWeights()} method
* is then called on the top-level query to compute the query normalization
* factor (@link Similarity#queryNorm(float_t)}). This factor is then passed to
* {@link #normalize(float_t)}. At this point the weighting is complete and a
* scorer may be constructed by calling {@link #scorer(IndexReader)}.
*/
class Weight: LUCENE_BASE {
public:
virtual ~Weight(){
};
/** The query that this concerns. */
virtual Query* getQuery() = 0;
/** The weight for this query. */
virtual float_t getValue() = 0;
/** The sum of squared weights of contained query clauses. */
virtual float_t sumOfSquaredWeights() = 0;
/** Assigns the query normalization factor to this. */
virtual void normalize(float_t norm) = 0;
/** Constructs a scorer for this. */
virtual Scorer* scorer(CL_NS(index)::IndexReader* reader) = 0;
/** An explanation of the score computation for the named document. */
virtual void explain(CL_NS(index)::IndexReader* reader, int32_t doc, Explanation* ret) = 0;
virtual TCHAR* toString(){
return STRDUP_TtoT(_T("Weight"));
}
};
class HitDoc:LUCENE_BASE {
public:
float_t score;
int32_t id;
CL_NS(document)::Document* doc;
HitDoc* next; // in doubly-linked cache
HitDoc* prev; // in doubly-linked cache
HitDoc(const float_t s, const int32_t i);
~HitDoc();
};
// A ranked list of documents, used to hold search results.
class Hits:LUCENE_BASE {
private:
Query* query;
Searcher* searcher;
Filter* filter;
const Sort* sort;
size_t _length; // the total number of hits
CL_NS(util)::CLVector Documents are cached, so that repeated requests for the same element may
return the same Document object.
*
* @memory Memory belongs to the hits object. Don't delete the return value.
*/
CL_NS(document)::Document& doc(const int32_t n);
/** Returns the id for the nth document in this set. */
int32_t id (const int32_t n);
/** Returns the score for the nth document in this set. */
float_t score(const int32_t n);
private:
// Tries to add new documents to hitDocs.
// Ensures that the hit numbered Implementations provide search over a single index, over multiple
* indices, and over indices on remote servers.
*/
class Searchable: LUCENE_BASE {
public:
virtual ~Searchable(){
}
/** Lower-level search API.
*
* {@link HitCollector#collect(int32_t,float_t)} is called for every non-zero
* scoring document.
*
* Applications should only use this if they need all of the
* matching documents. The high-level search API ({@link
* Searcher#search(Query*)}) is usually more efficient, as it skips
* non-high-scoring hits.
*
* @param query to match documents
* @param filter if non-null, a bitset used to eliminate some documents
* @param results to receive hits
*/
virtual void _search(Query* query, Filter* filter, HitCollector* results) = 0;
/** Frees resources associated with this Searcher.
* Be careful not to call this method while you are still using objects
* like {@link Hits}.
*/
virtual void close() = 0;
/** Expert: Returns the number of documents containing Called by {@link Hits}.
*
* Applications should usually call {@link Searcher#search(Query*)} or
* {@link Searcher#search(Query*,Filter*)} instead.
*/
virtual TopDocs* _search(Query* query, Filter* filter, const int32_t n) = 0;
/** Expert: Returns the stored fields of document This is intended to be used in developing Similarity implementations,
* and, for good performance, should not be displayed with every hit.
* Computing an explanation is as expensive as executing the query over the
* entire index.
*/
virtual void explain(Query* query, int32_t doc, Explanation* ret) = 0;
/** Expert: Low-level search implementation with arbitrary sorting. Finds
* the top Applications should usually call {@link
* Searcher#search(Query,Filter,Sort)} instead.
*/
virtual TopFieldDocs* _search(Query* query, Filter* filter, const int32_t n, const Sort* sort) = 0;
};
/** An abstract base class for search implementations.
* Implements some common utility methods.
*/
class Searcher:public Searchable {
private:
/** The Similarity implementation used by this searcher. */
Similarity* similarity;
public:
Searcher(){
similarity = Similarity::getDefault();
}
virtual ~Searcher(){
}
// Returns the documents matching {@link HitCollector#collect(int32_t ,float_t)} is called for every non-zero
* scoring document.
*
* Applications should only use this if they need all of the
* matching documents. The high-level search API ({@link
* Searcher#search(Query*)}) is usually more efficient, as it skips
* non-high-scoring hits.
* Note: The This defaults to the current value of {@link Similarity#getDefault()}.
*/
Similarity* getSimilarity(){
return this->similarity;
}
};
/** The abstract base class for queries.
Instantiable subclasses are:
A parser for queries is contained in:
Only implemented by derived queries, with no
* {@link #_createWeight(Searcher)} implementatation.
*/
virtual Query* combine(Query** queries);
/** Expert: merges the clauses of a set of BooleanQuery's into a single
* BooleanQuery.
*
* A utility for use by {@link #combine(Query[])} implementations.
*/
static Query* mergeBooleanQueries(Query** queries);
/** Expert: Returns the Similarity implementation to be used for this query.
* Subclasses may override this method to specify their own Similarity
* implementation, perhaps one that delegates through that of the Searcher.
* By default the Searcher's Similarity implementation is returned.*/
Similarity* getSimilarity(Searcher* searcher);
/** Returns a clone of this query. */
virtual Query* clone() const = 0;
virtual const TCHAR* getQueryName() const = 0;
bool instanceOf(const TCHAR* other) const;
/** Prints a query to a string, with The representation used is one that is readable by
* {@link queryParser.QueryParser QueryParser}
* (although, if the query was created by the parser, the printed
* representation may not be exactly what was parsed).
*/
virtual TCHAR* toString(const TCHAR* field) const = 0;
virtual bool equals(Query* other) const = 0;
virtual size_t hashCode() const = 0;
/** Prints a query to a string. */
TCHAR* toString() const;
/** Expert: Constructs an appropriate Weight implementation for this query.
*
* Only implemented by primitive queries, which re-write to themselves.
* This is an Internal function
*/
virtual Weight* _createWeight(Searcher* searcher);
};
CL_NS_END
#endif
_min
has been retrieved.
void getMoreDocs(const size_t _min);
HitDoc* getHitDoc(const size_t n);
void addToFront(HitDoc* hitDoc);
void remove(const HitDoc* hitDoc);
};
/** The interface for search implementations.
*
* term
.
* Called by search code to compute term weights.
* @see IndexReader#docFreq(Term).
*/
virtual int32_t docFreq(const CL_NS(index)::Term* term) const = 0;
/** Expert: Returns one greater than the largest possible document number.
* Called by search code to compute term weights.
* @see IndexReader#maxDoc().
*/
virtual int32_t maxDoc() const = 0;
/** Expert: Low-level search implementation. Finds the top n
* hits for query
, applying filter
if non-null.
*
* i
.
* Called by {@link HitCollector} implementations.
* @see IndexReader#document(int32_t).
*/
virtual bool doc(int32_t i, CL_NS(document)::Document* d) = 0;
_CL_DEPRECATED( doc(i, document) ) CL_NS(document)::Document* doc(const int32_t i);
/** Expert: called to re-write queries into primitive queries. */
virtual Query* rewrite(Query* query) = 0;
/** Returns an Explanation that describes how doc
scored against
* query
.
*
* n
hits for query
, applying
* filter
if non-null, and sorting the hits by the criteria in
* sort
.
*
* query
.
Hits* search(Query* query) {
return search(query, (Filter*)NULL );
}
// Returns the documents matching query
and
// filter
.
Hits* search(Query* query, Filter* filter) {
return _CLNEW Hits(this, query, filter);
}
/** Returns documents matching query
sorted by
* sort
.
*/
Hits* search(Query* query, const Sort* sort){
return _CLNEW Hits(this, query, NULL, sort);
}
/** Returns documents matching query
and filter
,
* sorted by sort
.
*/
Hits* search(Query* query, Filter* filter, const Sort* sort){
return _CLNEW Hits(this, query, filter, sort);
}
/** Lower-level search API.
*
* score
passed to this method is a raw score.
* In other words, the score will not necessarily be a float whose value is
* between 0 and 1.
*/
void _search(Query* query, HitCollector* results) {
Searchable::_search(query, NULL, results);
}
/** Expert: Set the Similarity implementation used by this Searcher.
*
* @see Similarity#setDefault(Similarity)
*/
void setSimilarity(Similarity* similarity) {
this->similarity = similarity;
}
/** Expert: Return the Similarity implementation used by this Searcher.
*
*
*/
class Query :LUCENE_BASE {
private:
// query boost factor
float_t boost;
protected:
Query(const Query& clone);
public:
Query();
virtual ~Query();
/** Sets the boost for this query clause to b
. Documents
* matching this clause will (in addition to the normal weightings) have
* their score multiplied by b
.
*/
void setBoost(float_t b);
/** Gets the boost for this clause. Documents matching
* this clause will (in addition to the normal weightings) have their score
* multiplied by b
. The boost is 1.0 by default.
*/
float_t getBoost() const;
/** Expert: Constructs an initializes a Weight for a top-level query. */
Weight* weight(Searcher* searcher);
/** Expert: called to re-write queries into primitive queries. */
virtual Query* rewrite(CL_NS(index)::IndexReader* reader);
/** Expert: called when re-writing queries under MultiSearcher.
*
* field
as the default field
* for terms.