/*------------------------------------------------------------------------------ * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team * * Distributable under the terms of either the Apache License (Version 2.0) or * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "CLucene/StdHeader.h" #include "TermInfosWriter.h" #include "CLucene/store/Directory.h" #include "CLucene/util/Misc.h" #include "FieldInfos.h" #include "Term.h" #include "TermInfo.h" #include "IndexWriter.h" CL_NS_USE(util) CL_NS_USE(store) CL_NS_DEF(index) TermInfosWriter::TermInfosWriter(Directory* directory, const char* segment, FieldInfos* fis, int32_t interval): fieldInfos(fis){ //Func - Constructor //Pre - directory contains a valid reference to a Directory // segment != NULL // fis contains a valid reference to a reference FieldInfos //Post - The instance has been created CND_PRECONDITION(segment != NULL, "segment is NULL"); //Initialize instance initialise(directory,segment,interval, false); other = _CLNEW TermInfosWriter(directory, segment,fieldInfos, interval, true); CND_CONDITION(other != NULL, "other is NULL"); other->other = this; } TermInfosWriter::TermInfosWriter(Directory* directory, const char* segment, FieldInfos* fis, int32_t interval, bool isIndex): fieldInfos(fis){ //Func - Constructor //Pre - directory contains a valid reference to a Directory // segment != NULL // fis contains a valid reference to a reference FieldInfos // isIndex is true or false //Post - The instance has been created CND_PRECONDITION(segment != NULL, "segment is NULL"); initialise(directory,segment,interval,isIndex); } void TermInfosWriter::initialise(Directory* directory, const char* segment, int32_t interval, bool IsIndex){ //Func - Helps constructors to initialize Instance //Pre - directory contains a valid reference to a Directory // segment != NULL // fis contains a valid reference to a reference FieldInfos //Post - The instance has been initialized lastTerm = _CLNEW Term; CND_CONDITION(lastTerm != NULL, "Could not allocate memory for lastTerm"); lastTi = _CLNEW TermInfo(); CND_CONDITION(lastTi != NULL, "Could not allocate memory for lastTi"); lastIndexPointer = 0; size = 0; isIndex = IsIndex; indexInterval = interval; skipInterval = LUCENE_DEFAULT_TERMDOCS_SKIP_INTERVAL; const char* buf = Misc::segmentname(segment, (isIndex ? ".tii" : ".tis")); output = directory->createOutput( buf ); _CLDELETE_CaARRAY(buf); output->writeInt(FORMAT); // write format output->writeLong(0); // leave space for size output->writeInt(indexInterval);// write indexInterval output->writeInt(skipInterval); // write skipInterval //Set other to NULL by Default other = NULL; } TermInfosWriter::~TermInfosWriter(){ //Func - Destructor //Pre - true //Post - de instance has been destroyed close(); } void TermInfosWriter::add(Term* term, const TermInfo* ti) { //Func - Writes a Term and TermInfo to the outputstream //Pre - Term must be lexicographically greater than all previous Terms added. // Pointers of TermInfo ti (freqPointer and proxPointer) must be positive and greater than all previous. CND_PRECONDITION(isIndex || (!isIndex && term->compareTo(lastTerm) > 0),"term out of order"); CND_PRECONDITION(ti->freqPointer >= lastTi->freqPointer,"freqPointer out of order"); CND_PRECONDITION(ti->proxPointer >= lastTi->proxPointer,"proxPointer out of order"); if (!isIndex && size % indexInterval == 0){ //add an index term other->add(lastTerm, lastTi); } //write term writeTerm(term); // write doc freq output->writeVInt(ti->docFreq); //write pointers output->writeVLong(ti->freqPointer - lastTi->freqPointer); output->writeVLong(ti->proxPointer - lastTi->proxPointer); if (ti->docFreq >= skipInterval) { output->writeVInt(ti->skipOffset); } if (isIndex){ output->writeVLong(other->output->getFilePointer() - lastIndexPointer); lastIndexPointer = other->output->getFilePointer(); // write pointer } lastTi->set(ti); size++; } void TermInfosWriter::close() { //Func - Closes the TermInfosWriter //Pre - true //Post - The TermInfosWriter has been closed if (output){ //write size at start output->seek(4); // write size after format output->writeLong(size); output->close(); _CLDELETE(output); if (!isIndex){ if(other){ other->close(); _CLDELETE( other ); } } _CLDECDELETE(lastTerm); _CLDELETE(lastTi); } } void TermInfosWriter::writeTerm(Term* term) { int32_t start = Misc::stringDifference(lastTerm->text(),lastTerm->textLength(), term->text(),term->textLength()); int32_t length = term->textLength() - start; output->writeVInt(start); // write shared prefix length output->writeVInt(length); // write delta length output->writeChars(term->text(), start, length); // write delta chars int32_t fieldnum = fieldInfos->fieldNumber(term->field()); CND_PRECONDITION(fieldnum>=-1&&fieldnumsize(),"Fieldnum is out of range"); output->writeVInt(fieldnum); // write field num if ( lastTerm->__cl_refcount == 1 ){ lastTerm->set(term,term->text()); }else{ _CLDECDELETE(lastTerm); lastTerm = _CL_POINTER(term); } } CL_NS_END