/*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team * * Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/StdHeader.h" #include "TermScorer.h" #include "CLucene/index/Terms.h" #include "TermQuery.h" CL_NS_USE(index) CL_NS_DEF(search) //TermScorer takes TermDocs and delets it when TermScorer is cleaned up TermScorer::TermScorer(Weight* w, CL_NS(index)::TermDocs* td, Similarity* similarity,uint8_t* _norms): Scorer(similarity), termDocs(td), norms(_norms), weight(w), weightValue(w->getValue()), _doc(0), pointer(0), pointerMax(0) { memset(docs,0,32*sizeof(int32_t)); memset(freqs,0,32*sizeof(int32_t)); for (int32_t i = 0; i < LUCENE_SCORE_CACHE_SIZE; i++) scoreCache[i] = getSimilarity()->tf(i) * weightValue; } TermScorer::~TermScorer(){ _CLDELETE(termDocs); } bool TermScorer::next(){ pointer++; if (pointer >= pointerMax) { pointerMax = termDocs->read(docs, freqs, 32); // refill buffer if (pointerMax != 0) { pointer = 0; } else { termDocs->close(); // close stream _doc = LUCENE_INT32_MAX_SHOULDBE; // set to sentinel value return false; } } _doc = docs[pointer]; return true; } bool TermScorer::skipTo(int32_t target) { // first scan in cache for (pointer++; pointer < pointerMax; pointer++) { if (docs[pointer] >= target) { _doc = docs[pointer]; return true; } } // not found in cache, seek underlying stream bool result = termDocs->skipTo(target); if (result) { pointerMax = 1; pointer = 0; docs[pointer] = _doc = termDocs->doc(); freqs[pointer] = termDocs->freq(); } else { _doc = LUCENE_INT32_MAX_SHOULDBE; } return result; } void TermScorer::explain(int32_t doc, Explanation* tfExplanation) { TermQuery* query = (TermQuery*)weight->getQuery(); int32_t tf = 0; while (pointer < pointerMax) { if (docs[pointer] == doc) tf = freqs[pointer]; pointer++; } if (tf == 0) { while (termDocs->next()) { if (termDocs->doc() == doc) { tf = termDocs->freq(); } } } termDocs->close(); tfExplanation->setValue(getSimilarity()->tf(tf)); TCHAR buf[LUCENE_SEARCH_EXPLANATION_DESC_LEN+1]; TCHAR* termToString = query->getTerm(false)->toString(); _sntprintf(buf,LUCENE_SEARCH_EXPLANATION_DESC_LEN,_T("tf(termFreq(%s)=%d)"), termToString, tf); _CLDELETE_CARRAY(termToString); tfExplanation->setDescription(buf); } TCHAR* TermScorer::toString() { TCHAR* wb = weight->toString(); int32_t rl = _tcslen(wb) + 9; //9=_tcslen("scorer(" ")") + 1 TCHAR* ret = _CL_NEWARRAY(TCHAR,rl); _sntprintf(ret,rl,_T("scorer(%s)"), wb); _CLDELETE_ARRAY(wb); return ret; } float_t TermScorer::score(){ int32_t f = freqs[pointer]; float_t raw = // compute tf(f)*weight f < LUCENE_SCORE_CACHE_SIZE // check cache ? scoreCache[f] // cache hit : getSimilarity()->tf(f) * weightValue; // cache miss return raw * Similarity::decodeNorm(norms[_doc]); // normalize for field } CL_NS_END