[bt-devel] Strongs search infinite loop fix
Martin Gruner
mg.pub at gmx.net
Mon Feb 11 14:02:44 MST 2008
Jim,
you now have SVN access. Sorry for the inconvenience.
Your contributions are welcome. Please commit the fix yourself.
mg
Am Montag, 11. Februar 2008 schrieb jdc:
> Martin Gruner or Eeli Kaikkonen
>
> I have fixed the infinite loop problem in the strongs search. It is
> actually in the strongs portion of
> CSearchResultArea::highlightSearchedText(). I have attached the file
> that contains the fix.
> csearchdialogareas.cpp.gz (single gzipped file of csearchdialogareas.cpp).
>
> In case the attachment does not get through, the single function in the
> above file is at the end of this e-mail.
>
> At one time, I had access as a developer to the cvs repository. It may
> be that I am not using svn properly but this is the error I get:
>
> Username: jim-campbell
>
> svn: Commit failed (details follow):
> svn: MKACTIVITY of
> '/svnroot/bibletime/!svn/act/db6237d4-d85e-11dc-8779-79e0e7d1eb57': 403
> Forbidden (https://bibletime.svn.sourceforge.net)
>
>
>
> I don't have a lot of time to help but I would still like to contribute
> as often as I can.
>
> Thanks
> Jim
>
> sourceforge username jim-campbell
>
>
>
>
> const QString CSearchResultArea::highlightSearchedText(const QString&
> content, const QString& searchedText/*, const int searchFlags*/) {
> QString ret = content;
>
> //const bool cs = (searchFlags & CSwordModuleSearch::caseSensitive);
> const Qt::CaseSensitivity cs = Qt::CaseInsensitive;
>
> // int index = 0;
> int index = ret.indexOf("<body", 0);
> int matchLen = 0;
> int length = searchedText.length();
>
> // Highlighting constants -
> // TODO: We need to make the highlight color configurable.
> const QString rep1("<span style=\"background-color:#FFFF66;\">");
> const QString rep2("</span>");
> const unsigned int repLength = rep1.length() + rep1.length();
> const QString rep3("style=\"background-color:#FFFF66;\" ");
> const unsigned int rep3Length = rep3.length();
>
>
> bool inQuote;
> QString newSearchText;
>
> newSearchText = searchedText;
>
> // find the strongs search lemma and highlight it
> // search the searched text for "strong:" until it is not found anymore
> QStringList list;
>
> // split the search string - some possibilities are "\\s|\\|",
> "\\s|\\+", or "\\s|\\|\\+"
> // TODO: find all possible seperators
> QString regExp = "\\s";
> list = searchedText.split(QRegExp(regExp));
> foreach (QString newSearchText, list) {
> int sstIndex; // strong search text index for finding "strong:"
> int idx1, idx2, sTokenIndex, sTokenIndex2;
> QString sNumber, lemmaText;
>
> sstIndex = newSearchText.indexOf("strong:");
> if (sstIndex == -1)
> continue;
>
> // set the start index to the start of <body>
> int strongIndex = index;
>
> // Get the strongs number from the search text.
> // First, find the first space after "strong:"
> sstIndex = sstIndex + 7;
> // get the strongs number -> the text following "strong:" to the
> end of the string.
> sNumber = newSearchText.mid(sstIndex, -1);
> // find all the "lemma=" inside the the content
> while((strongIndex = ret.indexOf("lemma=", strongIndex, cs)) !=
> -1) {
> // get the strongs number after the lemma and compare it
> with the
> // strongs number we are looking for
> idx1 = ret.indexOf("\"", strongIndex) + 1;
> idx2 = ret.indexOf("\"", idx1 + 1);
> lemmaText = ret.mid(idx1, idx2 - idx1);
>
> // this is interesting because we could have a strongs
> number like: G3218|G300
> // To handle this we will use some extra cpu cycles and do a
> partial match against
> // the lemmaText
> if (lemmaText.contains(sNumber)) {
> // strongs number is found now we need to highlight it
> // I believe the easiest way is to insert rep3 just
> before "lemma="
> ret = ret.insert(strongIndex, rep3);
> strongIndex += rep3Length;
> }
> strongIndex += 6; // 6 is the length of "lemma="
> }
> }
> //---------------------------------------------------------------------
> // now that the strong: stuff is out of the way continue with
> // other search options
> //---------------------------------------------------------------------
>
> // try to figure out how to use the lucene query parser
>
> //using namespace lucene::queryParser;
> //using namespace lucene::search;
> //using namespace lucene::analysis;
> //using namespace lucene::util;
>
> //wchar_t *buf;
> //char buf8[1000];
> //standard::WhitespaceAnalyzer analyzer;
> //lucene_utf8towcs(m_wcharBuffer, searchedText.utf8(), MAX_CONV_SIZE);
> //boost::scoped_ptr<Query> q( QueryParser::parse(m_wcharBuffer,
> _T("content"), &analyzer) );
> //StringReader reader(m_wcharBuffer);
> //TokenStream* tokenStream = analyzer.tokenStream( _T("field"),
> &reader);
> //Token token;
> //while(tokenStream->next(&token) != 0) {
> // lucene_wcstoutf8(buf8, token.termText(), 1000);
> // printf("%s\n", buf8);
> //}
>
> //===========================================================
> // since I could not figure out the lucene query parser, I
> // made a simple parser.
> //===========================================================
> QStringList words = QueryParser(newSearchText);
> for ( int wi = 0; (unsigned int)wi < words.count(); ++wi ) {
> //search for every word in the list
> QRegExp findExp;
> QString word = words[ wi ];
> if (word.contains("*")) {
> length = word.length() - 1;
> word.replace('*', "\\S*"); //match within a word
> findExp = QRegExp(word);
> findExp.setMinimal(TRUE);
> }
> else {
> length = word.length();
> findExp = QRegExp("\\b" + word + "\\b");
> }
>
> // index = 0; //for every word start at the beginning
> index = ret.indexOf("<body", 0);
> findExp.setCaseSensitivity(cs);
> //while ( (index = ret.find(findExp, index)) != -1 ) { //while
> we found the word
> while ( (index = findExp.indexIn(ret, index)) != -1 ) { //while
> we found the word
> matchLen = findExp.matchedLength();
> if (!CToolClass::inHTMLTag(index, ret)) {
> length = matchLen;
> ret = ret.insert( index+length, rep2 );
> ret = ret.insert( index, rep1 );
> index += repLength;
> }
> index += length;
> }
> }
> //qWarning("\n\n\n%s", ret.latin1());
> return ret;
> };
More information about the bt-devel
mailing list