[sword-svn] r148 - trunk/textsstats
scribe at www.crosswire.org
scribe at www.crosswire.org
Fri Dec 12 00:58:29 MST 2008
Author: scribe
Date: 2008-12-12 00:58:28 -0700 (Fri, 12 Dec 2008)
New Revision: 148
Modified:
trunk/textsstats/stats.cpp
Log:
Modified: trunk/textsstats/stats.cpp
===================================================================
--- trunk/textsstats/stats.cpp 2008-11-13 05:30:37 UTC (rev 147)
+++ trunk/textsstats/stats.cpp 2008-12-12 07:58:28 UTC (rev 148)
@@ -112,6 +112,12 @@
bool compareFreq(const Word &w1, const Word &w2) {
return w1.freq > w2.freq;
}
+bool compareSeqLenFreq(const Word &w1, const Word &w2) {
+ if (w1.utf16.size() != w2.utf16.size()) {
+ return (w1.utf16.size() > w2.utf16.size());
+ }
+ return w1.freq > w2.freq;
+}
bool compareKJVFreq(const KJVPhrases::const_iterator &i1, const KJVPhrases::const_iterator &i2) {
@@ -196,12 +202,31 @@
for (vector<Word>::const_iterator it = seqList.begin(); it != seqList.end(); it++) {
const Word &w = (*it);
// cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
- cout << w.freq << "|" << toUTF8(w.utf16).c_str() << "|" << w.utf16.size() << "\n";
+ cout << w.freq << "," << toUTF8(w.utf16).c_str() << "," << w.utf16.size() << "\n";
}
std::cout << std::endl;
}
+void outputHTML(const vector<Word> &seqList) {
+ for (vector<Word>::const_iterator it = seqList.begin(); it != seqList.end(); it++) {
+ const Word &w = (*it);
+// cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
+ cout << "<tr><td>" << w.freq << "</td><td>" << toUTF8(w.utf16).c_str() << "</td></tr>\n";
+ }
+ std::cout << std::endl;
+}
+void outputXML(const vector<Word> &seqList) {
+ for (vector<Word>::const_iterator it = seqList.begin(); it != seqList.end(); it++) {
+ const Word &w = (*it);
+// cout << w->freq << "|" << escapedUTF8(w->utf8).c_str() << "|" << w->strong << "|" << prettyKJVFreq(w->kjvFreq).c_str() << "\n";
+ cout << "<Row><Cell><Data ss:Type=\"Number\">" << w.freq << "</Data></Cell>";
+ cout << "<Cell><Data ss:Type=\"String\">" << toUTF8(w.utf16).c_str() << "</Data></Cell>";
+ cout << "<Cell><Data ss:Type=\"Number\">" << w.utf16.size() << "</Data></Cell></Row>\n";
+ }
+ std::cout << std::endl;
+}
+
/**
* output our flashcard .flash file format
*
@@ -339,10 +364,14 @@
int minLength = 1;
int maxLength = 3;
char *range = "mat-rev";
+ int order = 1;
+ int format = 1;
if (argc > 1) minLength = atoi(argv[1]);
if (argc > 2) maxLength = atoi(argv[2]);
if (argc > 3) range = argv[3];
+ if (argc > 4) order = atoi(argv[4]);
+ if (argc > 5) format = atoi(argv[5]);
vector<Word> results;
for (int i = minLength; i <= maxLength; i++) {
@@ -350,8 +379,21 @@
results.insert(results.end(), pass.begin(), pass.end());
}
- sort(results.begin(), results.end(), compareFreq);
- outputCSV(results);
+ if (order == 1) {
+ sort(results.begin(), results.end(), compareFreq);
+ }
+ else {
+ sort(results.begin(), results.end(), compareSeqLenFreq);
+ }
+ if (format == 1) {
+ outputCSV(results);
+ }
+ else if (format == 2) {
+ outputHTML(results);
+ }
+ else {
+ outputXML(results);
+ }
return 0;
}
More information about the sword-cvs
mailing list