[sword-svn] r1812 - in trunk/src/modules: . filters
scribe at crosswire.org
scribe at crosswire.org
Mon May 16 02:34:51 MST 2005
Author: scribe
Date: 2005-05-16 02:34:50 -0700 (Mon, 16 May 2005)
New Revision: 1812
Modified:
trunk/src/modules/filters/gbfstrongs.cpp
trunk/src/modules/filters/osisstrongs.cpp
trunk/src/modules/filters/osiswordjs.cpp
trunk/src/modules/filters/thmlstrongs.cpp
trunk/src/modules/swmodule.cpp
Log:
Fixed and made consistent all lemma and morph parsing
Modified: trunk/src/modules/filters/gbfstrongs.cpp
===================================================================
--- trunk/src/modules/filters/gbfstrongs.cpp 2005-05-15 08:30:38 UTC (rev 1811)
+++ trunk/src/modules/filters/gbfstrongs.cpp 2005-05-16 09:34:50 UTC (rev 1812)
@@ -58,17 +58,17 @@
}
if (*from == '>') { // process tokens
intoken = false;
-
if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs
if (module->isProcessEntryAttributes()) {
valto = val;
- for (unsigned int i = 2; ((token[i]) && (i < 150)); i++)
+ for (unsigned int i = 1; ((token[i]) && (i < 150)); i++)
*valto++ = token[i];
*valto = 0;
if (atoi((!isdigit(*val))?val+1:val) < 5627) {
// normal strongs number
sprintf(wordstr, "%03d", word++);
- module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong";
tmp = "";
tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
@@ -78,8 +78,10 @@
// verb morph
sprintf(wordstr, "%03d", word-1);
module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph";
}
}
+
if (!option) {
if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) {
if (lastspace)
@@ -89,6 +91,18 @@
continue;
}
}
+ if (module->isProcessEntryAttributes()) {
+ if ((*token == 'W') && (token[1] == 'T')) { // Strongs
+ valto = val;
+ for (unsigned int i = 2; ((token[i]) && (i < 150)); i++)
+ *valto++ = token[i];
+ *valto = 0;
+ sprintf(wordstr, "%03d", word-1);
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "GBFMorph";
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ newText = true;
+ }
+ }
// if not a strongs token, keep token in text
text += '<';
text += token;
Modified: trunk/src/modules/filters/osisstrongs.cpp
===================================================================
--- trunk/src/modules/filters/osisstrongs.cpp 2005-05-15 08:30:38 UTC (rev 1811)
+++ trunk/src/modules/filters/osisstrongs.cpp 2005-05-16 09:34:50 UTC (rev 1812)
@@ -8,7 +8,8 @@
#include <stdlib.h>
#include <osisstrongs.h>
#include <swmodule.h>
-#include <ctype.h>
+#include <versekey.h>
+#include <utilxml.h>
SWORD_NAMESPACE_START
@@ -33,11 +34,12 @@
int tokpos = 0;
bool intoken = false;
bool lastspace = false;
- int word = 1;
+ int wordNum = 1;
char val[128];
char wordstr[5];
char *valto;
char *ch;
+ const char *wordStart = 0;
const SWBuf orig = text;
const char * from = orig.c_str();
@@ -54,53 +56,67 @@
if (*from == '>') { // process tokens
intoken = false;
if ((*token == 'w') && (token[1] == ' ')) { // Word
- *wordstr = 0;
if (module->isProcessEntryAttributes()) {
- valto = val;
- char *num = strstr(token, "lemma=\"x-Strongs:");
- int strongMarkerLength = 17;
- if (!num) { //try alternative strong marker value
- num = strstr(token, "lemma=\"strong:");
- strongMarkerLength = 14;
+ wordStart = from+1;
+ char gh = 0;
+ VerseKey *vkey = 0;
+ if (key) {
+ vkey = SWDYNAMIC_CAST(VerseKey, key);
}
+ XMLTag wtag(token);
+ SWBuf lemma = wtag.getAttribute("lemma");
+ SWBuf morph = wtag.getAttribute("morph");
+ SWBuf src = wtag.getAttribute("src");
+ SWBuf morphClass = "";
+ SWBuf lemmaClass = "";
- if (num) {
- for (num+=strongMarkerLength; ((*num) && (*num != '\"')); num++) {
- *valto++ = *num;
- }
- *valto = 0;
-
- if (atoi((!isdigit(*val))?val+1:val) < 5627) {
- // normal strongs number
- sprintf(wordstr, "%03d", word++);
- module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
-
- //now try to find the end tag to get the text between <w> and </w> to set the entry attribute
-
- const char* startTagEnd = strstr(from, ">"); //end of the opening tag
- if (startTagEnd) {
- startTagEnd++;
-
- const char* endTagStart = strstr(startTagEnd, "</w>"); //end of the opening tag
- if (endTagStart && endTagStart > startTagEnd) { //content in between
- SWBuf tmp;
- tmp.append(startTagEnd, endTagStart - startTagEnd);
- module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
- }
+
+ const char *m = strchr(morph.c_str(), ':');
+ if (m) {
+ int len = m-morph.c_str();
+ morphClass.append(morph.c_str(), len);
+ morph << len+1;
+ }
+ m = strchr(lemma.c_str(), ':');
+ if (m) {
+ int len = m-lemma.c_str();
+ lemmaClass.append(lemma.c_str(), len);
+ lemma << len+1;
+ }
+
+ if ((lemmaClass == "x-Strongs") || (lemmaClass == "strong")) {
+ gh = isdigit(lemma[0]) ? 0:lemma[0];
+ if (!gh) {
+ if (vkey) {
+ gh = vkey->Testament() ? 'H' : 'G';
}
}
- else {
- // verb morph
- sprintf(wordstr, "%03d", word-1);
- module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
- }
+ else lemma << 1;
+ lemmaClass = "strong";
}
- }
- if (wordstr) {
+ if ((morphClass == "x-Robinsons") || (morphClass == "x-Robinson") || (morphClass == "Robinson")) {
+ morphClass = "robinson";
+ }
+
+ sprintf(wordstr, "%03d", wordNum);
+ if (gh) lemma.insert(0,gh);
+ if (lemma.length())
+ module->getEntryAttributes()["Word"][wordstr]["Lemma"] = lemma;
+ if (lemmaClass.length())
+ module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = lemmaClass;
+ if (morph.length())
+ module->getEntryAttributes()["Word"][wordstr]["Morph"] = morph;
+ if (morphClass.length())
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = morphClass;
+ if (src.length())
+ module->getEntryAttributes()["Word"][wordstr]["Src"] = src;
strcat(token, " wn=\"");
strcat(token, wordstr);
strcat(token, "\"");
+
+ wordNum++;
}
+
if (!option) {
char *num = strstr(token, "lemma=\"x-Strongs:");
if (num) {
@@ -114,6 +130,17 @@
}
}
}
+ if ((*token == '/') && (token[1] == 'w')) { // Word End
+ if (module->isProcessEntryAttributes()) {
+ if (wordStart) {
+ SWBuf tmp;
+ tmp.append(wordStart, (from-wordStart)-3);
+ sprintf(wordstr, "%03d", wordNum-1);
+ module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
+ }
+ }
+ wordStart = 0;
+ }
// if not a strongs token, keep token in text
text.append('<');
Modified: trunk/src/modules/filters/osiswordjs.cpp
===================================================================
--- trunk/src/modules/filters/osiswordjs.cpp 2005-05-15 08:30:38 UTC (rev 1811)
+++ trunk/src/modules/filters/osiswordjs.cpp 2005-05-16 09:34:50 UTC (rev 1812)
@@ -125,11 +125,8 @@
const char *m = strchr(morph.c_str(), ':');
if (m) m++;
else m = morph.c_str();
- text.appendFormatted("<span onclick=\"p(\'%s\', \'%s\', '%s', '%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m);
+ text.appendFormatted("<span onclick=\"p(\'%s\',\'%s\','%s','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m);
wordNum++;
-
-
-
}
if ((*token == '/') && (token[1] == 'w') && option) { // Word
text += "</w></span>";
Modified: trunk/src/modules/filters/thmlstrongs.cpp
===================================================================
--- trunk/src/modules/filters/thmlstrongs.cpp 2005-05-15 08:30:38 UTC (rev 1811)
+++ trunk/src/modules/filters/thmlstrongs.cpp 2005-05-16 09:34:50 UTC (rev 1812)
@@ -68,8 +68,9 @@
*valto = 0;
if (atoi((!isdigit(*val))?val+1:val) < 5627) {
// normal strongs number
- sprintf(wordstr, "%03d", word++);
- module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val;
+ sprintf(wordstr, "%03d", word);
+ module->getEntryAttributes()["Word"][wordstr]["Lemma"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["LemmaClass"] = "strong";
tmp = "";
tmp.append(text.c_str()+textStart, (int)(textEnd - textStart));
module->getEntryAttributes()["Word"][wordstr]["Text"] = tmp;
@@ -77,9 +78,11 @@
}
else {
// verb morph
- sprintf(wordstr, "%03d", word-1);
+ sprintf(wordstr, "%03d", word);
module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
+ module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = "OLBMorph";
}
+ word++;
}
if (!option) { // if we don't want strongs
@@ -100,6 +103,9 @@
*valto++ = ch[i];
*valto = 0;
sprintf(wordstr, "%03d", word-1);
+ if ((!stricmp(val, "Robinsons")) || (!stricmp(val, "Robinson"))) {
+ strcpy(val, "robinson");
+ }
module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val;
}
if (!strncmp(ch, "value=\"", 7)) {
@@ -111,6 +117,7 @@
module->getEntryAttributes()["Word"][wordstr]["Morph"] = val;
}
}
+ newText = true;
}
}
// if not a strongs token, keep token in text
Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp 2005-05-15 08:30:38 UTC (rev 1811)
+++ trunk/src/modules/swmodule.cpp 2005-05-16 09:34:50 UTC (rev 1812)
@@ -373,7 +373,7 @@
* >=0 - regex
* -1 - phrase
* -2 - multiword
- * -3 - entryAttrib (eg. Word//Strongs/G1234/)
+ * -3 - entryAttrib (eg. Word//Lemma/G1234/)
* -4 - clucene
* flags - options flags for search
* justCheckIfSupported - if set, don't search, only tell if this
@@ -976,7 +976,7 @@
words = getEntryAttributes().find("Word");
if (words != getEntryAttributes().end()) {
for (word = words->second.begin();word != words->second.end(); word++) {
- strongVal = word->second.find("Strongs");
+ strongVal = word->second.find("Lemma");
if (strongVal != word->second.end()) {
// cheeze. skip empty article tags that weren't assigned to any text
if (strongVal->second == "G3588") {
@@ -994,7 +994,7 @@
doc->add( Field::UnIndexed(_T("key"), keyText ) );
doc->add( Field::UnStored(_T("content"), content) );
if (strong.length() > 0)
- doc->add( Field::UnStored(_T("strong"), strong) );
+ doc->add( Field::UnStored(_T("lemma"), strong) );
writer->addDocument(*doc);
delete doc;
}
More information about the sword-cvs
mailing list