[sword-svn] r556 - trunk/migratetags
scribe at crosswire.org
scribe at crosswire.org
Sun Apr 16 11:15:24 EDT 2023
Author: scribe
Date: 2023-04-16 11:15:24 -0400 (Sun, 16 Apr 2023)
New Revision: 556
Modified:
trunk/migratetags/migratetags.cpp
Log:
added option to allow additions of lexical information
Modified: trunk/migratetags/migratetags.cpp
===================================================================
--- trunk/migratetags/migratetags.cpp 2023-04-16 13:48:43 UTC (rev 555)
+++ trunk/migratetags/migratetags.cpp 2023-04-16 15:15:24 UTC (rev 556)
@@ -36,12 +36,13 @@
SWBuf findCanonicalBibleText(SWBuf orig, BibMap &bibMap, BibMap &tTags);
SWBuf buildWordMaps(const SWBuf &markupBuf, const BibMap &bibMap, vector<SWBuf> &targetWords, vector<int> &targetWordStarts, vector<int> &targetWordEnds);
void pullFromModData(SWModule &fromMod, vector<XMLTag>&wordTags, vector<SWBuf> &fromWords, vector<int> &fromWordTags);
-void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector<int> &targetWordTags, const vector<XMLTag> &wordTags, const vector<int> &targetWordStarts, const vector<int> &targetWordEnds);
+void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector<int> &targetWordTags, const vector<XMLTag> &wordTags, const vector<int> &targetWordStarts, const vector<int> &targetWordEnds, SWConfig *lex = 0);
// app options
bool optionFilterAccents = false;
bool optionFilterAppCrit = false;
bool optionDebug = false;
+bool optionIncludeLex = false;
vector<SWBuf> optionExceptionFile;
SWConfig *exceptionFile = 0;
@@ -50,6 +51,7 @@
fprintf(stderr, "\n=== migratetags (Revision $Rev$) Migrate word morphology from one module to another.\n");
fprintf(stderr, "\nusage: %s [options]\n", progName);
fprintf(stderr, " -ss <moduleName>\t provide the Strong's source module name\n");
+ fprintf(stderr, " -l \t\t include lexical and source information\n");
fprintf(stderr, " -t <moduleName>\t provide the target module name\n");
fprintf(stderr, " -tei <filename>\t provide the target tei filename\n");
fprintf(stderr, " -e <exception file>\t provide an ini-style .conf file with overriding tag exceptions.\n");
@@ -188,6 +190,9 @@
else if (!strcmp(argv[i], "-fa")) {
optionFilterAccents = true;
}
+ else if (!strcmp(argv[i], "-l")) {
+ optionIncludeLex = true;
+ }
else if (!strcmp(argv[i], "-fc")) {
optionFilterAppCrit = true;
}
@@ -256,6 +261,10 @@
VerseKey *targetModKey = (VerseKey *)(targetInput ? fromMod.createKey() : targetMod->createKey());
targetModKey->setIntros(true);
SWBuf targetModText;
+ SWConfig *lex = 0;
+ if (optionIncludeLex) {
+ lex = new SWConfig("../flashtools/greek.conf");
+ }
while ((targetInput ? getNextVerseTEI(targetModKey, &targetModText) : getNextVerse(targetModKey, &targetModText))) {
if (targetModKey->getError()) {
cout << targetModText;
@@ -408,9 +417,10 @@
//
matcher->matchWords(targetWordTags, targetWords, fromWords, fromWordTags);
+
// ok, now that we have our targetWordTags magically populated
// let's do the grunt work of inserting the <w> and </w> tags
- insertWordTags((VerseKey *)targetModKey, newTargetModMarkup, bibMap, wTags, targetWordTags, wordTags, targetWordStarts, targetWordEnds);
+ insertWordTags((VerseKey *)targetModKey, newTargetModMarkup, bibMap, wTags, targetWordTags, wordTags, targetWordStarts, targetWordEnds, lex);
if (optionDebug) {
@@ -477,6 +487,7 @@
}
delete exceptionFile;
+ delete lex;
return 0;
}
@@ -686,7 +697,7 @@
}
-void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector<int> &targetWordTags, const vector<XMLTag> &wordTags, const vector<int> &targetWordStarts, const vector<int> &targetWordEnds) {
+void insertWordTags(VerseKey *vk, SWBuf &markupBuf, BibMap &bibMap, BibMap &wTags, vector<int> &targetWordTags, const vector<XMLTag> &wordTags, const vector<int> &targetWordStarts, const vector<int> &targetWordEnds, SWConfig *lex) {
// TODO: this method needs some work,
// like putting multiple consecutive words
// together in one tag
@@ -711,6 +722,23 @@
}
}
if (wordTag.length()) {
+ // if we have been asked to include extra lexical data
+ if (lex) {
+ XMLTag w(wordTag);
+ int attCount = w.getAttributePartCount("lemma", ' ');
+ for (int i = 0; i < attCount; ++i) {
+ SWBuf a = w.getAttribute("lemma", i, ' ');
+ SWBuf c = a.stripPrefix(':');
+ if (c == "strong") {
+ if (a.startsWith("G") || a.startsWith("H")) a << 1;
+ SWBuf dict = (*lex)[a]["UTF8"];
+ SWBuf gloss = (*lex)[a]["Meaning"];
+ w.setAttribute("corresp", dict);
+ w.setAttribute("gloss", gloss);
+ wordTag = w.toString();
+ }
+ }
+ }
insert((const char *)wordTag, markupBuf, targetWordStarts[i], bibMap, wTags);
insert("</w>", markupBuf, targetWordEnds[i], bibMap, wTags, true);
}
More information about the sword-cvs
mailing list