[sword-svn] r96 - trunk/flashtools
scribe at www.crosswire.org
scribe at www.crosswire.org
Sun Sep 2 13:00:06 MST 2007
Author: scribe
Date: 2007-09-02 13:00:05 -0700 (Sun, 02 Sep 2007)
New Revision: 96
Modified:
trunk/flashtools/flash.cpp
Log:
reworked 'with' vector to go with phrase, instead of count
added more comments
Modified: trunk/flashtools/flash.cpp
===================================================================
--- trunk/flashtools/flash.cpp 2007-09-02 18:27:12 UTC (rev 95)
+++ trunk/flashtools/flash.cpp 2007-09-02 20:00:05 UTC (rev 96)
@@ -14,15 +14,43 @@
using namespace sword;
using namespace std;
-class PhraseCount {
+// used to hold a KJV translation phrase for a greek/hebrew word
+// and any greek/hebrew words combined to make this KJV phrase
+// e.g. hO QEOS = QEOS: [+ hO ]: God
+class Phrase {
public:
- PhraseCount()
- : count(0)
+ Phrase()
+ : phrase("")
{}
- int count;
+ SWBuf phrase;
vector<SWBuf> with;
+ inline bool operator ==(const Phrase &other) const { return !compare(other); }
+ inline bool operator !=(const Phrase &other) const { return compare(other); }
+ inline bool operator > (const Phrase &other) const { return compare(other) > 0; }
+ inline bool operator < (const Phrase &other) const { return compare(other) < 0; }
+ inline bool operator <=(const Phrase &other) const { return compare(other) <= 0; }
+ inline bool operator >=(const Phrase &other) const { return compare(other) >= 0; }
+
+ int compare(const Phrase &right) const {
+ int c = phrase.compare(right.phrase);
+ if (c) return c;
+ vector<SWBuf>::const_iterator lit = with.begin();
+ vector<SWBuf>::const_iterator rit = right.with.begin();
+ while (lit != with.end() && rit != right.with.end()) {
+ c = lit->compare(*rit);
+ if (c) return c;
+ lit++; rit++;
+ }
+ if (lit != with.end()) return 1;
+ if (rit != right.with.end()) return -1;
+ return 0;
+ }
};
+// KJV phrases and their occurance frequency
+typedef map<Phrase, int> KJVPhrases;
+
+// primary result class
class Word {
public:
Word()
@@ -31,37 +59,50 @@
, freq(0)
, def("")
{}
+
+ // lexical form of this word in utf8 greek/hebrew
SWBuf utf8;
+
+ // strongs number for this word (e.g. G3588)
SWBuf strong;
+
+ // frequency of occurance in the iterated text
int freq;
- // from stongs lex
+
+ // definition pulled from short strongs def
SWBuf def;
- // computed ourselves
- map<SWBuf, PhraseCount> kjvFreq;
+
+ // kjv translation phrases and their frequencies
+ KJVPhrases kjvFreq;
};
+
string itoa(int v) { stringstream str; str << v; return str.str(); }
+
bool compareFreq(const Word &w1, const Word &w2) {
return w1.freq > w2.freq;
}
-bool compareKJVFreq(const map<SWBuf, PhraseCount>::const_iterator &i1, const map<SWBuf, PhraseCount>::const_iterator &i2) {
- return i1->second.count > i2->second.count;
+
+bool compareKJVFreq(const KJVPhrases::const_iterator &i1, const KJVPhrases::const_iterator &i2) {
+ return i1->second > i2->second;
}
-SWBuf prettyKJVFreq(map<SWBuf, PhraseCount> in) {
+
+// sort and pretty up all the KJV phrases for a word into a nice output buffer
+SWBuf prettyKJVFreq(KJVPhrases in) {
SWBuf retVal;
- vector<map<SWBuf, PhraseCount>::const_iterator> sorted;
- for (map<SWBuf, PhraseCount>::const_iterator it = in.begin(); it != in.end(); it++) {
+ vector<KJVPhrases::const_iterator> sorted;
+ for (KJVPhrases::const_iterator it = in.begin(); it != in.end(); it++) {
// combine cap words with lowercase, if exists
- SWBuf k = it->first;
- if (k.size() && toupper(k[0]) == k[0] && k != "God" && k != "Lord") {
- k[0] = tolower(k[0]);
+ Phrase k = it->first;
+ if (k.phrase.size() && toupper(k.phrase[0]) == k.phrase[0] && k.phrase != "God" && k.phrase != "Lord") {
+ k.phrase[0] = tolower(k.phrase[0]);
if (k != it->first) {
- map<SWBuf, PhraseCount>::iterator i = in.find(k);
+ KJVPhrases::iterator i = in.find(k);
if (i != in.end()) {
- i->second.count += it->second.count;
+ i->second += it->second;
// don't include us in the list cuz we added our freq to another
continue;
}
@@ -70,21 +111,24 @@
sorted.push_back(it);
}
sort(sorted.begin(), sorted.end(), compareKJVFreq);
- for (vector<map<SWBuf, PhraseCount>::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) {
+ for (vector<KJVPhrases::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) {
if (retVal.size()) retVal += "; ";
// prepend 'with other strongs' if present
- if ((*it)->second.with.size()) {
+ if ((*it)->first.with.size()) {
retVal += "[+";
- for (int i = 0; i < (*it)->second.with.size(); i++) {
- retVal.appendFormatted(" %s", (*it)->second.with[i].c_str());
+ for (int i = 0; i < (*it)->first.with.size(); i++) {
+ retVal.appendFormatted(" %s", (*it)->first.with[i].c_str());
}
retVal += " ] ";
}
- retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second.count);
+ retVal.appendFormatted("%s (%d)", (*it)->first.phrase.c_str(), (*it)->second);
}
return retVal;
}
+
+// take utf8 text and spit out equiv. text substituting escaped codes for multibyte chars
+// java .properties files wants this format (flashcard .flash lessons use this format)
SWBuf escapedUTF8(SWBuf inText) {
static UTF8UTF16 convert;
convert.processText(inText);
@@ -105,6 +149,7 @@
}
+// output a simple CSV ('|' separated really) format for importing into OOo or excel
void outputCSV(vector<Word> &wordList) {
for (vector<Word>::iterator it = wordList.begin(); it != wordList.end(); it++) {
Word &w = (*it);
@@ -184,12 +229,14 @@
}
}
+
/**
* do the work
*
* range - the range of verses to process (e.g. "gen-mal")
* addAll - if we should add all words in our lexicon for the testaments
* included in the range even if they don't exist in the text
+ * (useful for generating complete OT or NT strongs word lists)
*
*/
vector<Word> processWords(const char *range, bool addAll = true) {
@@ -238,12 +285,15 @@
while (text.size() && (strchr(".;,?-!\"()[]{}':/\t\r\n ", text[text.size()-1]))) text.setSize(text.size()-1);
if (!text.size()) text = "[Untranslated]";
}
- wordList[strong].kjvFreq[text].count++;
+ Phrase p;
+ p.phrase = text;
if (parts > 1) {
+ // lets build our 'with' list excluding ourselves
list<SWBuf> withoutMe = lemmas;
withoutMe.remove(strong);
- wordList[strong].kjvFreq[text].with = vector<SWBuf>(withoutMe.begin(), withoutMe.end());
+ p.with = vector<SWBuf>(withoutMe.begin(), withoutMe.end());
}
+ wordList[strong].kjvFreq[p]++;
wordList[strong].freq++;
}
}
More information about the sword-cvs
mailing list