[sword-svn] r95 - trunk/flashtools
scribe at www.crosswire.org
scribe at www.crosswire.org
Sun Sep 2 11:27:13 MST 2007
Author: scribe
Date: 2007-09-02 11:27:12 -0700 (Sun, 02 Sep 2007)
New Revision: 95
Started rework of flash tools to work with
combined lemma of the new KJV module rev.
Modified: trunk/flashtools/Makefile
--- trunk/flashtools/Makefile 2007-08-31 07:41:49 UTC (rev 94)
+++ trunk/flashtools/Makefile 2007-09-02 18:27:12 UTC (rev 95)
@@ -1,7 +1,9 @@
TARGETS= flash
all: $(TARGETS)
+ mkdir -p hebFreq hebFreqKJV greekFreq greekFreqKJV
+ rm -rf hebFreq hebFreqKJV greekFreq greekFreqKJV
Modified: trunk/flashtools/flash.cpp
--- trunk/flashtools/flash.cpp 2007-08-31 07:41:49 UTC (rev 94)
+++ trunk/flashtools/flash.cpp 2007-09-02 18:27:12 UTC (rev 95)
@@ -14,46 +14,55 @@
using namespace sword;
using namespace std;
+class PhraseCount {
+ PhraseCount()
+ : count(0)
+ {}
+ int count;
+ vector<SWBuf> with;
class Word {
: utf8("")
- , strong(0)
+ , strong("")
, freq(0)
- , kjvTrans("")
+ , def("")
SWBuf utf8;
- int strong;
+ SWBuf strong;
int freq;
// from stongs lex
- SWBuf kjvTrans;
+ SWBuf def;
// computed ourselves
- map<SWBuf, int> kjvFreq;
+ map<SWBuf, PhraseCount> kjvFreq;
string itoa(int v) { stringstream str; str << v; return str.str(); }
-bool compareFreq(const Word *w1, const Word *w2) {
- return w1->freq > w2->freq;
+bool compareFreq(const Word &w1, const Word &w2) {
+ return w1.freq > w2.freq;
-bool compareKJVFreq(const map<SWBuf, int>::const_iterator &i1, const map<SWBuf, int>::const_iterator &i2) {
- return i1->second > i2->second;
+bool compareKJVFreq(const map<SWBuf, PhraseCount>::const_iterator &i1, const map<SWBuf, PhraseCount>::const_iterator &i2) {
+ return i1->second.count > i2->second.count;
-SWBuf prettyKJVFreq(map<SWBuf, int> &in) {
+SWBuf prettyKJVFreq(map<SWBuf, PhraseCount> in) {
SWBuf retVal;
- vector<map<SWBuf, int>::const_iterator> sorted;
- for (map<SWBuf, int>::const_iterator it = in.begin(); it != in.end(); it++) {
+ vector<map<SWBuf, PhraseCount>::const_iterator> sorted;
+ for (map<SWBuf, PhraseCount>::const_iterator it = in.begin(); it != in.end(); it++) {
// combine cap words with lowercase, if exists
- if (toupper(it->first[0]) == it->first[0] && it->first != "God" && it->first != "Lord") {
- SWBuf key = it->first;
- key[0] = tolower(key[0]);
- if (key != it->first) {
- map<SWBuf, int>::iterator i = in.find(key);
+ SWBuf k = it->first;
+ if (k.size() && toupper(k[0]) == k[0] && k != "God" && k != "Lord") {
+ k[0] = tolower(k[0]);
+ if (k != it->first) {
+ map<SWBuf, PhraseCount>::iterator i = in.find(k);
if (i != in.end()) {
- i->second += it->second;
- // don't include us in the list cuz we added out freq to another
+ i->second.count += it->second.count;
+ // don't include us in the list cuz we added our freq to another
@@ -61,9 +70,17 @@
sort(sorted.begin(), sorted.end(), compareKJVFreq);
- for (vector<map<SWBuf, int>::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) {
+ for (vector<map<SWBuf, PhraseCount>::const_iterator>::const_iterator it = sorted.begin(); it != sorted.end(); it++) {
if (retVal.size()) retVal += "; ";
- retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second);
+ // prepend 'with other strongs' if present
+ if ((*it)->second.with.size()) {
+ retVal += "[+";
+ for (int i = 0; i < (*it)->second.with.size(); i++) {
+ retVal.appendFormatted(" %s", (*it)->second.with[i].c_str());
+ }
+ retVal += " ] ";
+ }
+ retVal.appendFormatted("%s (%d)", (*it)->first.c_str(), (*it)->second.count);
return retVal;
@@ -88,49 +105,67 @@
-void outputCSV(vector<Word *> &wordList) {
- for (vector<Word *>::iterator it = wordList.begin(); it != wordList.end(); it++) {
- Word *w = (*it);
+void outputCSV(vector<Word> &wordList) {
+ for (vector<Word>::iterator it = wordList.begin(); it != wordList.end(); it++) {
+ Word &w = (*it);
// cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
- cout << w->freq << "|" << w->utf8.c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "|" << w->kjvTrans << "\n";
+ cout << w.freq << "|" << w.utf8.c_str() << "|" << w.strong << "|" << prettyKJVFreq(w.kjvFreq).c_str() << "|" << w.def << "\n";
std::cout << std::endl;
-void outputFlash(vector<Word *> &wordList, int maxPerLesson) {
+ * output our flashcard .flash file format
+ *
+ * wordList - duh
+ * outputDir - directory path where to write files, e.g. "./hebFreq"
+ * kjvFreq - if true, process KJV translation frequencies and use these as
+ * the word answers; otherwise, use short strongs defs.
+ * maxPerLesson - maximum number of words per lesson
+ *
+ */
+void outputFlash(const vector<Word> &wordList, const char *outputDir = ".", bool kjvFreq = true, int maxPerLesson = 25) {
ThMLPlain strip;
ofstream ofile;
- int wordCount = 0;
+ int wordCount = 0;
int lessonNumber = 0;
- int startFreq = 0;
- int lastFreq = 0;
+ int startFreq = 0;
+ int lastFreq = 0;
- vector<Word *>::iterator it = wordList.begin();
+ vector<Word>::const_iterator it = wordList.begin();
while (it != wordList.end()) {
- Word *w = (*it);
+ const Word &w = (*it);
if (!wordCount) {
- SWBuf fname = "lesson";
+ SWBuf fname = outputDir;
+ fname += "/lesson";
fname.appendFormatted("%d", lessonNumber);
fname += ".flash";
- startFreq = w->freq;
+ startFreq = w.freq;
- // use if you want answers as KJV phrases
- SWBuf answers = prettyKJVFreq(w->kjvFreq);
- if (answers.size() > 200) answers.size(200);
+ SWBuf word = w.utf8;
+ word.trim();
+ SWBuf answers = "";
+ answers.trim();
+ // if we want answers as KJV phrases
+ if (kjvFreq) {
+ answers = prettyKJVFreq(w.kjvFreq);
+ if (answers.size() > 200) answers.size(200);
+ }
+ // if we would rather have short strongs
+ else {
+ SWBuf answers = w.def;
+ strip.processText(answers); // remove html tags
+ answers.replaceBytes("\n\r", ' '); // remove newlines
+ }
- // use if you would rather have short strongs
-// SWBuf answers = w->kjvTrans;
-// strip.processText(answers); // remove html tags
-// answers.replaceBytes("\n\r", ' '); // remove newlines
// be sure we have both a word and an answer
- if (w->utf8.trim().size() && answers.trim().size()) {
- ofile << "word" << wordCount << "=" << escapedUTF8(w->utf8) << "\n";
+ if (word.size() && answers.size()) {
+ ofile << "word" << wordCount << "=" << escapedUTF8(word) << "\n";
ofile << "answers" << wordCount << "=" << answers << "\n";
- lastFreq = w->freq;
+ lastFreq = w.freq;
@@ -149,53 +184,115 @@
-int main(int argc, char **argv)
+ * do the work
+ *
+ * range - the range of verses to process (e.g. "gen-mal")
+ * addAll - if we should add all words in our lexicon for the testaments
+ * included in the range even if they don't exist in the text
+ *
+ */
+vector<Word> processWords(const char *range, bool addAll = true) {
SWMgr manager;
- SWModule *bible = manager.getModule("KJV");
- map<int, Word> wordList;
+ SWModule &bible = *manager.getModule("KJV");
+ map<SWBuf, Word> wordList;
- SWConfig utf8("hwords.conf");
- SWConfig defs("hdefs.conf");
-// SWConfig utf8("gwords.conf");
-// SWConfig defs("gdefs.conf");
+ SWConfig hutf8("hwords.conf");
+ SWConfig hdefs("hdefs.conf");
+ SWConfig gutf8("gwords.conf");
+ SWConfig gdefs("gdefs.conf");
- for (bible->setKey("gen.1.1"); ((VerseKey*)bible->getKey())->Testament() == 1; (*bible)++) {
-// for (bible->setKey("mat.1.1"); !bible->Error(); (*bible)++) {
- bible->RenderText(); // force an entry lookup to resolve key to something in the index
+ VerseKey parser;
+ ListKey r = parser.ParseVerseList(range, 0, true);
+ r.Persist(true);
+ bible.setKey(r);
+ for (bible = TOP; !bible.Error(); bible++) {
+ bible.RenderText(); // force an entry lookup to resolve key to something in the index
- AttributeList &words = bible->getEntryAttributes()["Word"];
+ AttributeList &words = bible.getEntryAttributes()["Word"];
for (AttributeList::iterator word = words.begin(); word != words.end(); word++) {
- SWBuf strong = word->second["Lemma"];
- SWBuf text = word->second["Text"];
- text.trim();
- // trim punctuation from end
- while (text.size() && (strchr(".;,?-!\"()[]{}':/\t\r\n ", text[text.size()-1]))) text.setSize(text.size()-1);
- if (!text.size()) text = "[Untranslated]";
- strong << 1;
- wordList[atoi(strong.c_str())].freq++;
- wordList[atoi(strong.c_str())].kjvFreq[text]++;
-// cout << strong << "\n";
+ SWBuf partCount = word->second["PartCount"];
+ int parts = atoi(partCount.c_str());
+ if (parts < 1) parts = 1;
+ // build a list of all lemmas for use later in 'with'
+ // i.e. 'translated xxx with Gnnnn1, Gnnnn2'
+ list<SWBuf> lemmas;
+ for (int i = 1; i <= parts; i++) {
+ SWBuf lemKey = "Lemma";
+ if (parts > 1) lemKey.appendFormatted(".%d", i);
+ lemmas.push_back(word->second[lemKey]);
+ }
+ for (int i = 1; i <= parts; i++) {
+ SWBuf lemKey = "Lemma";
+ if (parts > 1) lemKey.appendFormatted(".%d", i);
+ SWBuf strong = word->second[lemKey];
+ SWBuf text = word->second["Text"];
+ if ((parts > 2) && (strong == "G3588")) {
+ text = "[article]";
+ }
+ else {
+ text.trim();
+ // trim punctuation from end
+ while (text.size() && (strchr(".;,?-!\"()[]{}':/\t\r\n ", text[text.size()-1]))) text.setSize(text.size()-1);
+ if (!text.size()) text = "[Untranslated]";
+ }
+ wordList[strong].kjvFreq[text].count++;
+ if (parts > 1) {
+ list<SWBuf> withoutMe = lemmas;
+ withoutMe.remove(strong);
+ wordList[strong].kjvFreq[text].with = vector<SWBuf>(withoutMe.begin(), withoutMe.end());
+ }
+ wordList[strong].freq++;
+ }
- // first use utf8 list to iterate and add utf8 entries.\
- // this assures we have an entry for every word, even it it is not
- // present in the module
- for (ConfigEntMap::iterator it = utf8["words"].begin(); it != utf8["words"].end(); it++) {
- wordList[atoi(it->first)].utf8 = it->second;
+ if (addAll) {
+ // first use utf8 list to iterate and add utf8 entries.\
+ // this assures we have an entry for every word, even if it is not
+ // present in the module
+ r = TOP;
+ if (VerseKey(r).Testament() == 1) {
+ for (ConfigEntMap::iterator it = hutf8["words"].begin(); it != hutf8["words"].end(); it++) {
+ wordList[(SWBuf)"H"+it->first].utf8 = it->second;
+ }
+ }
+ r = BOTTOM;
+ if (VerseKey(r).Testament() == 2) {
+ for (ConfigEntMap::iterator it = gutf8["words"].begin(); it != gutf8["words"].end(); it++) {
+ wordList[(SWBuf)"G"+it->first].utf8 = it->second;
+ }
+ }
- vector<Word *> sorted;
- for (map<int, Word>::iterator it = wordList.begin(); it != wordList.end(); it++) {
- it->second.strong = it->first;
- it->second.kjvTrans = defs["defs"][itoa(it->first).c_str()];
- sorted.push_back(&it->second);
+ vector<Word> sorted;
+ for (map<SWBuf, Word>::iterator it = wordList.begin(); it != wordList.end(); it++) {
+ // pull strongs key from map and populate Word
+ SWBuf s = it->first;
+ it->second.strong = s;
+ // populate lex defs
+ it->second.def = (s[0] == 'G') ?
+ gdefs["defs"][(s << 1).c_str()] :
+ hdefs["defs"][(s << 1).c_str()];
+ // put only word in sorted container
+ sorted.push_back(it->second);
sort(sorted.begin(), sorted.end(), compareFreq);
-// outputCSV(sorted);
- outputFlash(sorted, 25);
+ return sorted;
+int main(int argc, char **argv)
+ outputFlash(processWords("gen-mal"), "hebFreqKJV" , true);
+ outputFlash(processWords("gen-mal"), "hebFreq" , false);
+ outputFlash(processWords("mat-rev"), "greekFreqKJV", true);
+ outputFlash(processWords("mat-rev"), "greekFreq" , false);
+// outputCSV(processWords("mat-rev"));
return 0;
More information about the sword-cvs
mailing list