/*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team * * Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #ifndef _lucene_search_Sort_ #define _lucene_search_Sort_ #if defined(_LUCENE_PRAGMA_ONCE) # pragma once #endif #include "CLucene/index/IndexReader.h" #include "SearchHeader.h" CL_NS_DEF(search) class SortField; //predefine class Sort; /** * Expert: Compares two ScoreDoc objects for sorting. * */ class ScoreDocComparator:LUCENE_BASE { protected: ScoreDocComparator(){} public: virtual ~ScoreDocComparator(); // CL_NS(util)::Comparable** cachedValues; // ScoreDocComparator(CL_NS(util)::Comparable** cachedValues); /** * Compares two ScoreDoc objects and returns a result indicating their * sort order. * @param i First ScoreDoc * @param j Second ScoreDoc * @return -1 if i should come before j
1 if i should come after j
0 if they are equal * @see java.util.Comparator */ virtual int32_t compare (struct ScoreDoc* i, struct ScoreDoc* j) = 0; /** * Returns the value used to sort the given document. The * object returned must implement the java.io.Serializable * interface. This is used by multisearchers to determine how to collate results from their searchers. * @see FieldDoc * @param i Document * @return Serializable object */ virtual CL_NS(util)::Comparable* sortValue (struct ScoreDoc* i) = 0; /** * Returns the type of sort. Should return SortField.SCORE, SortField.DOC, SortField.STRING, SortField.INTEGER, * SortField::FLOAT or SortField.CUSTOM. It is not valid to return SortField.AUTO. * This is used by multisearchers to determine how to collate results from their searchers. * @return One of the constants in SortField. * @see SortField */ virtual int32_t sortType() = 0; /** Special comparator for sorting hits according to computed relevance (document score). */ static ScoreDocComparator* RELEVANCE; /** Special comparator for sorting hits according to index order (document number). */ static ScoreDocComparator* INDEXORDER; }; /** * Expert: returns a comparator for sorting ScoreDocs. * */ class SortComparatorSource:LUCENE_BASE { public: virtual ~SortComparatorSource(){ } /** * return a reference to a string describing the name of the comparator * this is used in the explanation */ virtual TCHAR* getName() = 0; virtual size_t hashCode() = 0; /** * Creates a comparator for the field in the given index. * @param reader Index to create comparator for. * @param fieldname Field to create comparator for. * @return Comparator of ScoreDoc objects. * @throws IOException If an error occurs reading the index. */ virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname) = 0; }; /** * Abstract base class for sorting hits returned by a Query. * *

This class should only be used if the other SortField * types (SCORE, DOC, STRING, INT, FLOAT) do not provide an * adequate sorting. It maintains an internal cache of values which * could be quite large. The cache is an array of Comparable, * one for each document in the index. There is a distinct * Comparable for each unique term in the field - if * some documents have the same term in the field, the cache * array will have entries which reference the same Comparable. * */ class SortComparator: public SortComparatorSource { public: virtual ScoreDocComparator* newComparator (CL_NS(index)::IndexReader* reader, const TCHAR* fieldname); SortComparator(); virtual ~SortComparator(); /** * Returns an object which, when sorted according to natural order, * will order the Term values in the correct order. *

For example, if the Terms contained integer values, this method * would return new Integer(termtext). Note that this * might not always be the most efficient implementation - for this * particular example, a better implementation might be to make a * ScoreDocLookupComparator that uses an internal lookup table of int. * @param termtext The textual value of the term. * @return An object representing termtext that sorts * according to the natural order of termtext. * @see Comparable * @see ScoreDocComparator */ virtual CL_NS(util)::Comparable* getComparable (const TCHAR* termtext) = 0; }; /** * Stores information about how to sort documents by terms in an individual * field. Fields must be indexed in order to sort by them. * */ class SortField:LUCENE_BASE { private: const TCHAR* field; int32_t type; // defaults to determining type dynamically //Locale* locale; // defaults to "natural order" (no Locale) bool reverse; // defaults to natural order SortComparatorSource* factory; protected: SortField (const SortField& clone); public: virtual ~SortField(); /** Sort by document score (relevancy). Sort values are Float and higher * values are at the front. * PORTING: this is the same as SCORE in java, it had to be renamed because * SCORE is a system macro on some platforms (AIX). */ LUCENE_STATIC_CONSTANT(int32_t, DOCSCORE=0); /** Sort by document number (index order). Sort values are Integer and lower * values are at the front. */ LUCENE_STATIC_CONSTANT(int32_t, DOC=1); /** Guess type of sort based on field contents. A regular expression is used * to look at the first term indexed for the field and determine if it * represents an integer number, a floating point number, or just arbitrary * string characters. */ LUCENE_STATIC_CONSTANT(int32_t, AUTO=2); /** Sort using term values as Strings. Sort values are String and lower * values are at the front. */ LUCENE_STATIC_CONSTANT(int32_t, STRING=3); /** Sort using term values as encoded Integers. Sort values are Integer and * lower values are at the front. */ LUCENE_STATIC_CONSTANT(int32_t, INT=4); /** Sort using term values as encoded Floats. Sort values are Float and * lower values are at the front. */ LUCENE_STATIC_CONSTANT(int32_t, FLOAT=5); /** Sort using a custom Comparator. Sort values are any Comparable and * sorting is done according to natural order. */ LUCENE_STATIC_CONSTANT(int32_t, CUSTOM=9); // IMPLEMENTATION NOTE: the FieldCache.STRING_INDEX is in the same "namespace" // as the above static int values. Any new values must not have the same value // as FieldCache.STRING_INDEX. /** Represents sorting by document score (relevancy). */ static SortField* FIELD_SCORE; /** Represents sorting by document number (index order). */ static SortField* FIELD_DOC; /** Creates a sort by terms in the given field where the type of term value * is determined dynamically ({@link #AUTO AUTO}). * @param field Name of field to sort by, cannot be null. */ SortField (const TCHAR* field); //SortField (const TCHAR* field, bool reverse); //todo: we cannot make reverse use default field of =false. //because bool and int are the same type in c, overloading is not possible /** Creates a sort, possibly in reverse, by terms in the given field with the * type of term values explicitly given. * @param field Name of field to sort by. Can be null if * type is SCORE or DOC. * @param type Type of values in the terms. * @param reverse True if natural order should be reversed (default=false). */ SortField (const TCHAR* field, int32_t type, bool reverse); /* SortField (TCHAR* field, Locale* locale) { SortField (TCHAR* field, Locale* locale, bool reverse);*/ /** Creates a sort, possibly in reverse, with a custom comparison function. * @param field Name of field to sort by; cannot be null. * @param comparator Returns a comparator for sorting hits. * @param reverse True if natural order should be reversed (default=false). */ SortField (const TCHAR* field, SortComparatorSource* comparator, bool reverse=false); /** Returns the name of the field. Could return null * if the sort is by SCORE or DOC. * @return Name of field, possibly null. */ const TCHAR* getField() const { return field; } SortField* clone() const; /** Returns the type of contents in the field. * @return One of the constants SCORE, DOC, AUTO, STRING, INT or FLOAT. */ int32_t getType() const { return type; } /** Returns the Locale by which term values are interpreted. * May return null if no Locale was specified. * @return Locale, or null. */ /*Locale getLocale() { return locale; }*/ /** Returns whether the sort should be reversed. * @return True if natural order should be reversed. */ bool getReverse() const { return reverse; } SortComparatorSource* getFactory() const { return factory; } TCHAR* toString() const; }; /** * Encapsulates sort criteria for returned hits. * *

The fields used to determine sort order must be carefully chosen. * Documents must contain a single term in such a field, * and the value of the term should indicate the document's relative position in * a given sort order. The field must be indexed, but should not be tokenized, * and does not need to be stored (unless you happen to want it back with the * rest of your document data). In other words: * *

document.add (new Field ("byNumber", Integer.toString(x), false, true, false)); *
* *

Valid Types of Values

* *

There are three possible kinds of term values which may be put into * sorting fields: Integers, Floats, or Strings. Unless * {@link SortField SortField} objects are specified, the type of value * in the field is determined by parsing the first term in the field. * *

Integer term values should contain only digits and an optional * preceeding negative sign. Values must be base 10 and in the range * Integer.MIN_VALUE and Integer.MAX_VALUE inclusive. * Documents which should appear first in the sort * should have low value integers, later documents high values * (i.e. the documents should be numbered 1..n where * 1 is the first and n the last). * *

Float term values should conform to values accepted by * {@link Float Float.valueOf(String)} (except that NaN * and Infinity are not supported). * Documents which should appear first in the sort * should have low values, later documents high values. * *

String term values can contain any valid String, but should * not be tokenized. The values are sorted according to their * {@link Comparable natural order}. Note that using this type * of term value has higher memory requirements than the other * two types. * *

Object Reuse

* *

One of these objects can be * used multiple times and the sort order changed between usages. * *

This class is thread safe. * *

Memory Usage

* *

Sorting uses of caches of term values maintained by the * internal HitQueue(s). The cache is static and contains an integer * or float array of length IndexReader.maxDoc() for each field * name for which a sort is performed. In other words, the size of the * cache in bytes is: * *

4 * IndexReader.maxDoc() * (# of different fields actually used to sort) * *

For String fields, the cache is larger: in addition to the * above array, the value of every term in the field is kept in memory. * If there are many unique terms in the field, this could * be quite large. * *

Note that the size of the cache is not affected by how many * fields are in the index and might be used to sort - only by * the ones actually used to sort a result set. * *

The cache is cleared each time a new IndexReader is * passed in, or if the value returned by maxDoc() * changes for the current IndexReader. This class is not set up to * be able to efficiently sort hits from more than one index * simultaneously. * */ class Sort:LUCENE_BASE { // internal representation of the sort criteria SortField** fields; void clear(); public: ~Sort(); /** Represents sorting by computed relevance. Using this sort criteria * returns the same results as calling {@link Searcher#search(Query) Searcher#search()} * without a sort criteria, only with slightly more overhead. */ static Sort* RELEVANCE; /** Represents sorting by index order. */ static Sort* INDEXORDER; /** Sorts by computed relevance. This is the same sort criteria as * calling {@link Searcher#search(Query) Searcher#search()} without a sort criteria, only with * slightly more overhead. */ Sort(); /** Sorts possibly in reverse by the terms in field then by * index order (document number). The type of value in field is determined * automatically. * @see SortField#AUTO */ Sort (const TCHAR* field, bool reverse=false); /** Sorts in succession by the terms in each field. * The type of value in field is determined * automatically. * @see SortField#AUTO */ Sort (const TCHAR** fields); /** Sorts by the criteria in the given SortField. */ Sort (SortField* field); /** Sorts in succession by the criteria in each SortField. */ Sort (SortField** fields); /** Sets the sort to the terms in field possibly in reverse, * then by index order (document number). */ void setSort (const TCHAR* field, bool reverse=false); /** Sets the sort to the terms in each field in succession. */ void setSort (const TCHAR** fieldnames); /** Sets the sort to the given criteria. */ void setSort (SortField* field); /** Sets the sort to the given criteria in succession. */ void setSort (SortField** fields); TCHAR* toString() const; /** * Representation of the sort criteria. * @return a pointer to the of SortField array used in this sort criteria */ SortField** getSort() const{ return fields; } }; CL_NS_END #endif