[sword-svn] r2419 - trunk/utilities
dmsmith at crosswire.org
dmsmith at crosswire.org
Sat May 23 05:59:46 MST 2009
Author: dmsmith
Date: 2009-05-23 05:59:46 -0700 (Sat, 23 May 2009)
New Revision: 2419
Modified:
trunk/utilities/osis2mod.cpp
Log:
Fixed a pre-verse problem where the closing of a section was seen as closing a chapter.
Fixed a potential bug where a char* return from a subroutine was being used.
Changed debug to always be available.
Fixed silly whitespace problems where spaces were used to indent.
Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp 2009-05-20 06:50:37 UTC (rev 2418)
+++ trunk/utilities/osis2mod.cpp 2009-05-23 12:59:46 UTC (rev 2419)
@@ -50,9 +50,7 @@
using namespace std;
-// Turn debugging on and off
-//#define DEBUG
-int debug = 0;
+int debug = 0; // mask of debug flags
const int DEBUG_WRITE = 1; // writing to module
const int DEBUG_VERSE = 2; // verse start and end
const int DEBUG_QUOTE = 4; // quotes, especially Words of Christ (WOC)
@@ -232,11 +230,10 @@
bool inRange = false;
while (*p) {
if (inRange) {
-#ifdef DEBUG
- if (debug & DEBUG_REF) {
- cout << "DEBUG(REF): Copy range marker:" << *p << endl;;
- }
-#endif
+ if (debug & DEBUG_REF) {
+ cout << "DEBUG(REF): Copy range marker:" << *p << endl;;
+ }
+
// Range markers are copied as is
*s++ = *p++;
}
@@ -252,7 +249,7 @@
if (*n == ':') {
// set p to skip the work prefix
p = n + 1;
-#ifdef DEBUG
+
if (debug & DEBUG_REF) {
cout << "DEBUG(REF): Found a work prefix ";
for (char *x = s; x <= n; x++) {
@@ -260,29 +257,25 @@
}
cout << endl;
}
-#endif
}
// Now we are in the meat of an osisID.
// Copy it to its end but stop on a grain marker of '!'
-#ifdef DEBUG
if (debug & DEBUG_REF) {
cout << "DEBUG(REF): Copy osisID:";
}
-#endif
+
while (*p && *p != '!' && *p != ' ' && *p != '-') {
-#ifdef DEBUG
if (debug & DEBUG_REF) {
cout << *p;
}
-#endif
+
*s++ = *p++;
}
-#ifdef DEBUG
+
if (debug & DEBUG_REF) {
cout << endl;
}
-#endif
// The ! and everything following until we hit
// the end of the osisID is part of the grain reference
@@ -291,7 +284,7 @@
while (*n && *n != ' ' && *n != '-') {
n++;
}
-#ifdef DEBUG
+
if (debug & DEBUG_REF) {
cout << "DEBUG(REF): Found a grain suffix ";
for (char *x = p; x < n; x++) {
@@ -299,7 +292,7 @@
}
cout << endl;
}
-#endif
+
p = n;
}
@@ -309,13 +302,11 @@
// then we are entering a range
inRange = !inRange && *p == '-';
-#ifdef DEBUG
if (debug & DEBUG_REF) {
if (inRange) {
cout << "DEBUG(REF): Found a range" << endl;
}
}
-#endif
// between ranges and stand alone osisIDs we might have whitespace
if (!inRange && *p == ' ') {
@@ -323,13 +314,13 @@
while (*p == ' ') {
p++;
}
+
// replacing them all with a ';'
*s++ = ';';
-#ifdef DEBUG
+
if (debug & DEBUG_REF) {
cout << "DEBUG(REF): replacing space with ;. Remaining: " << p << endl;
}
-#endif
}
}
@@ -340,11 +331,10 @@
*s = '\0';
// Since we modified the swbuf, we need to tell it what we have done
buf.setSize(s - buf.c_str());
-#ifdef DEBUG
+
if (debug & DEBUG_REF) {
cout << "DEBUG(REF): shortended keyVal to`" << buf.c_str() << "`"<< endl;
}
-#endif
}
}
@@ -383,11 +373,9 @@
// If we have gotten here the reference is not in the selected versification.
cout << "INFO(V11N): " << before << " is not in the " << currentVerse.getVersificationSystem() << " versification." << endl;
-#ifdef DEBUG
if (debug & DEBUG_REV11N) {
cout << "DEBUG(V11N): " << before << " normalizes to " << after << endl;
}
-#endif
return false;
}
@@ -421,11 +409,9 @@
int chapterMax = key.getChapterMax();
int verseMax = key.getVerseMax();
-#ifdef DEBUG
if (debug & DEBUG_REV11N) {
cout << "DEBUG(V11N) Chapter max:" << chapterMax << ", Verse Max:" << verseMax << endl;
}
-#endif
cout << "INFO(V11N): " << key.getOSISRef() << " is not in the " << key.getVersificationSystem() << " versification.";
// Since isValidRef returned false constrain the key to the nearest prior reference.
@@ -535,11 +521,9 @@
activeVerseText = currentText + " " + activeVerseText;
}
-#ifdef DEBUG
if (debug & DEBUG_WRITE) {
cout << "DEBUG(WRITE): " << activeOsisID << ":" << currentVerse.getOSISRef() << ": " << activeVerseText << endl;
}
-#endif
module->setEntry(activeVerseText);
activeVerseText = "";
@@ -612,6 +596,12 @@
// Flag used to indicate where useful text begins
static bool firstDiv = false;
+ // Retain the sID of book, chapter and verse (commentary) divs so that we can find them again.
+ // This relies on transformBSP.
+ static SWBuf sidBook = "";
+ static SWBuf sidChapter = "";
+ static SWBuf sidVerse = "";
+
// Stack of quote elements used to handle Words of Christ
static std::stack<XMLTag> quoteStack;
@@ -628,9 +618,10 @@
static int verseDepth = 0;
int tagDepth = tagStack.size();
- const char *tokenName = token.getName();
+ SWBuf tokenName = token.getName();
bool isEndTag = token.isEndTag() || token.getAttribute("eID");
- const char *typeAttr = token.getAttribute("type");
+ SWBuf typeAttr = token.getAttribute("type");
+ SWBuf eidAttr = token.getAttribute("eID");
// process start tags
if (!isEndTag) {
@@ -638,21 +629,19 @@
// Remember non-empty start tags
if (!token.isEmpty()) {
tagStack.push(token);
-#ifdef DEBUG
+
if (debug & DEBUG_STACK) {
cout << "DEBUG(STACK): " << currentOsisID << ": push (" << tagStack.size() << ") " << token.getName() << endl;
}
-#endif
}
// throw away everything up to the first div
if (!firstDiv) {
if (!strcmp(tokenName, "div")) {
-#ifdef DEBUG
if (debug & DEBUG_OTHER) {
cout << "DEBUG(FOUND): Found first div and pitching prior material: " << text << endl;
}
-#endif
+
// TODO: Save off the content to use it to suggest the module's conf.
firstDiv = true;
text = "";
@@ -670,13 +659,13 @@
// BOOK START, <div type="book" ...>
if ((!strcmp(tokenName, "div")) && (typeAttr && !strcmp(typeAttr, "book"))) {
if (inBookHeader || inChapterHeader) { // this one should never happen, but just in case
-#ifdef DEBUG
+
if (debug & DEBUG_TITLE) {
cout << "DEBUG(TITLE): " << currentOsisID << ": OOPS HEADING " << endl;
cout << "\tinChapterHeader = " << inChapterHeader << endl;
cout << "\tinBookHeader = " << inBookHeader << endl;
}
-#endif
+
currentVerse.Testament(0);
currentVerse.Book(0);
currentVerse.Chapter(0);
@@ -688,6 +677,7 @@
currentVerse.Verse(0);
strcpy(currentOsisID, currentVerse.getOSISRef());
+ sidBook = token.getAttribute("sID");
inChapter = false;
inVerse = false;
inPreVerse = false;
@@ -702,11 +692,9 @@
if (!inCanonicalOSISBook) {
cout << "WARNING(V11N): New book is " << token.getAttribute("osisID") << " and is not in " << v11n << " versification, ignoring" << endl;
}
-#ifdef DEBUG
else if (debug & DEBUG_OTHER) {
cout << "DEBUG(FOUND): New book is " << currentVerse.getOSISRef() << endl;
}
-#endif
return false;
}
@@ -716,23 +704,23 @@
(!strcmp(tokenName, "chapter"))
) {
if (inBookHeader) {
-#ifdef DEBUG
if (debug & DEBUG_TITLE) {
cout << "DEBUG(TITLE): " << currentOsisID << ": BOOK HEADING "<< text.c_str() << endl;
}
-#endif
+
writeEntry(text);
}
currentVerse = token.getAttribute("osisID");
currentVerse.Verse(0);
-#ifdef DEBUG
+
if (debug & DEBUG_OTHER) {
cout << "DEBUG(FOUND): Current chapter is " << currentVerse.getOSISRef() << " (" << token.getAttribute("osisID") << ")" << endl;
}
-#endif
+
strcpy(currentOsisID, currentVerse.getOSISRef());
+ sidChapter = token.getAttribute("sID");
inChapter = true;
inVerse = false;
inPreVerse = false;
@@ -748,21 +736,19 @@
// VERSE, <verse ...> OR COMMENTARY START, <div annotateType="xxx" ...>
if (!strcmp(tokenName, "verse") ||
(!strcmp(tokenName, "div") && token.getAttribute("annotateType"))) {
-#ifdef DEBUG
if (debug & DEBUG_OTHER) {
cout << "DEBUG(FOUND): Entering verse" << endl;
}
-#endif
+
if (inChapterHeader) {
SWBuf heading = text;
text = "";
if (heading.length()) {
-#ifdef DEBUG
if (debug & DEBUG_TITLE) {
cout << "DEBUG(TITLE): " << currentOsisID << ": CHAPTER HEADING "<< heading.c_str() << endl;
}
-#endif
+
writeEntry(heading);
}
@@ -772,7 +758,7 @@
// Did we have pre-verse material that needs to be marked?
if (inPreVerse) {
char genBuf[200];
- sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" eID=\"pv%d\"/>", genID++);
+ sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" eID=\"pv%d\"/>", genID);
text.append(genBuf);
}
@@ -806,13 +792,13 @@
}
strcpy(currentOsisID, currentVerse.getOSISRef());
-#ifdef DEBUG
+
if (debug & DEBUG_OTHER) {
cout << "DEBUG(FOUND): New current verse is " << currentVerse.getOSISRef() << endl;
cout << "DEBUG(FOUND): osisID/annotateRef is adjusted to: " << keyVal << endl;
}
-#endif
+ sidVerse = token.getAttribute("sID");
inVerse = true;
inPreVerse = false;
inBookHeader = false;
@@ -823,7 +809,6 @@
if (strcmp(tokenName, "verse")) {
text.append(token);
}
-#ifdef DEBUG
else if (debug & DEBUG_VERSE)
{
// transform the verse into a milestone
@@ -836,7 +821,6 @@
}
text.append(t);
}
-#endif
if (inWOC) {
text.append(wocTag);
@@ -852,11 +836,11 @@
// Otherwise have to do it here
if (!strcmp(tokenName, "q")) {
quoteStack.push(token);
-#ifdef DEBUG
+
if (debug & DEBUG_QUOTE) {
cout << "DEBUG(QUOTE): " << currentOsisID << ": quote top(" << quoteStack.size() << ") " << token << endl;
}
-#endif
+
if (token.getAttribute("who") && !strcmp(token.getAttribute("who"), "Jesus")) {
inWOC = true;
@@ -916,18 +900,16 @@
if (inPreVerse) {
char genBuf[200];
- sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" sID=\"pv%d\"/>", genID);
+ sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" sID=\"pv%d\"/>", genID++);
text.append(genBuf);
}
}
-#ifdef DEBUG
if (debug & DEBUG_INTERVERSE) {
if (!inVerse && !inBookHeader && !inChapterHeader) {
cout << "DEBUG(INTERVERSE): " << currentOsisID << ": interverse start token " << token << ":" << text.c_str() << endl;
}
}
-#endif
return false;
} // Done with procesing start and empty tags
@@ -944,11 +926,11 @@
if (!token.isEmpty()) {
XMLTag topToken = tagStack.top();
tagDepth = tagStack.size();
-#ifdef DEBUG
+
if (debug & DEBUG_STACK) {
cout << "DEBUG(STACK): " << currentOsisID << ": pop(" << tagDepth << ") " << topToken.getName() << endl;
}
-#endif
+
tagStack.pop();
if (strcmp(topToken.getName(), tokenName)) {
@@ -966,7 +948,7 @@
}
// VERSE and COMMENTARY END
- if (!strcmp(tokenName, "verse") || (inVerse && !strcmp(tokenName, "div"))) {
+ if ((!strcmp(tokenName, "verse")) || ((!strcmp(tokenName, "div")) && (eidAttr == sidVerse))) {
if (tagDepth != verseDepth) {
cout << "WARNING(NESTING): verse " << currentOsisID << " is not well formed:(" << verseDepth << "," << tagDepth << ")" << endl;
@@ -982,7 +964,6 @@
if (strcmp(tokenName, "verse")) {
text.append(token);
}
-#ifdef DEBUG
else if (debug & DEBUG_VERSE)
{
// transform the verse into a milestone
@@ -995,7 +976,6 @@
}
text.append(t);
}
-#endif
writeEntry(text);
@@ -1011,21 +991,21 @@
// Otherwise have to manage it here
if (!strcmp(tokenName, "q")) {
XMLTag topToken = quoteStack.top();
-#ifdef DEBUG
+
if (debug & DEBUG_QUOTE) {
cout << "DEBUG(QUOTE): " << currentOsisID << ": quote pop(" << quoteStack.size() << ") " << topToken << " -- " << token << endl;
}
-#endif
+
quoteStack.pop();
// If we have found an end tag for a <q who="Jesus"> then we are done with the WOC
// and we need to terminate the <q who="Jesus" marker=""> that was added earlier in the verse.
if (token.getAttribute("who") && !strcmp(token.getAttribute("who"), "Jesus")) {
-#ifdef DEBUG
+
if (debug & DEBUG_QUOTE) {
cout << "DEBUG(QUOTE): " << currentOsisID << ": (" << quoteStack.size() << ") " << topToken << " -- " << token << endl;
}
-#endif
+
inWOC = false;
const char *sID = topToken.getAttribute("sID");
const char *eID = token.getAttribute("eID");
@@ -1061,17 +1041,18 @@
// Also for material that goes with last entry
if (!inVerse && !inBookHeader && !inChapterHeader) {
// Is this the end of a chapter.
- if (tagDepth == chapterDepth && (!strcmp(tokenName, "div") || !strcmp(tokenName, "chapter"))) {
+ if (((!strcmp(tokenName, "div")) && (eidAttr == sidChapter)) || (!strcmp(tokenName, "chapter"))) {
text.append(token);
writeEntry(text);
inChapter = false;
+ sidChapter = "";
chapterDepth = 0;
verseDepth = 0;
return true;
}
// Is it the end of a book
- if (tagDepth == bookDepth && (!strcmp(tokenName, "div"))) {
+ if ((!strcmp(tokenName, "div")) && (eidAttr == sidBook)) {
text.append(token);
writeEntry(text);
bookDepth = 0;
@@ -1093,21 +1074,19 @@
if (!inPreVerse) {
text.append(token);
writeEntry(text);
-#ifdef DEBUG
+
if (debug & DEBUG_INTERVERSE) {
cout << "DEBUG(INTERVERSE): " << currentOsisID << ": appending interverse end tag: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl;
}
-#endif
+
return true;
}
-#ifdef DEBUG
if (debug & DEBUG_INTERVERSE) {
cout << "DEBUG(INTERVERSE): " << currentOsisID << ": interverse end tag: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl;
}
-#endif
- return false;
+ return false;
}
return false;
@@ -1135,11 +1114,11 @@
// Support simplification transformations
if (t.isEmpty()) {
-#ifdef DEBUG
+
if (debug & DEBUG_XFORM) {
cout << "DEBUG(XFORM): " << currentOsisID << ": xform empty " << t << endl;
}
-#endif
+
return t;
}
@@ -1175,21 +1154,20 @@
t.setAttribute("sID", buf);
}
bspTagStack.push(t);
-#ifdef DEBUG
+
if (debug & DEBUG_XFORM) {
cout << "DEBUG(XFORM): " << currentOsisID << ": xform push (" << bspTagStack.size() << ") " << t << " (tagname=" << tagName << ")" << endl;
XMLTag topToken = bspTagStack.top();
cout << "DEBUG(XFORM): " << currentOsisID << ": xform top(" << bspTagStack.size() << ") " << topToken << endl;
}
-#endif
}
else {
XMLTag topToken = bspTagStack.top();
-#ifdef DEBUG
+
if (debug & DEBUG_XFORM) {
cout << "DEBUG(XFORM): " << currentOsisID << ": xform pop(" << bspTagStack.size() << ") " << topToken << endl;
}
-#endif
+
bspTagStack.pop();
// Look for the milestoneable container tags handled above.
@@ -1287,8 +1265,7 @@
StringList av11n = vmgr->getVersificationSystems();
for (StringList::iterator loop = av11n.begin(); loop != av11n.end(); loop++) {
fprintf(stderr, "\t\t\t\t\t%s\n", (*loop).c_str());
- }
-#ifdef DEBUG
+ }
fprintf(stderr, " -d <flags>\t\t turn on debugging (default is 0)\n");
fprintf(stderr, "\t\t\t\t Note: This flag may change in the future.\n");
fprintf(stderr, "\t\t\t\t Flags: The following are valid values:\n");
@@ -1304,7 +1281,6 @@
fprintf(stderr, "\t\t\t\t\t256 - internal stack\n");
fprintf(stderr, "\t\t\t\t\t512 - miscellaneous\n");
fprintf(stderr, "\t\t\t\t This flag can be used more than once.\n");
-#endif
fprintf(stderr, "\n");
fprintf(stderr, "See http://www.crosswire.org/wiki/osis2mod for more details.\n");
fprintf(stderr, "\n");
@@ -1453,32 +1429,30 @@
}
else if (!strcmp(argv[i], "-s")) {
if (compType.size()) usage(*argv, "Cannot specify -s and -z or -Z");
- if (i+1 < argc) {
- entrySize = atoi(argv[++i]);
- if (entrySize == 2 || entrySize == 4) {
- continue;
- }
- }
- usage(*argv, "-s requires one of <2|4>");
+ if (i+1 < argc) {
+ entrySize = atoi(argv[++i]);
+ if (entrySize == 2 || entrySize == 4) {
+ continue;
+ }
+ }
+ usage(*argv, "-s requires one of <2|4>");
}
else if (!strcmp(argv[i], "-C")) {
isCommentary = true;
}
-#ifdef DEBUG
else if (!strcmp(argv[i], "-d")) {
if (i+1 < argc) debug |= atoi(argv[++i]);
else usage(*argv, "-d requires <flags>");
}
-#endif
else usage(*argv, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str());
}
- if (compType == "ZIP") {
- compressor = new ZipCompress();
- }
- else if (compType == "LZSS") {
- compressor = new LZSSCompress();
- }
+ if (compType == "ZIP") {
+ compressor = new ZipCompress();
+ }
+ else if (compType == "LZSS") {
+ compressor = new LZSSCompress();
+ }
#ifndef _ICU_
if (normalize) {
@@ -1487,11 +1461,9 @@
}
#endif
-#ifdef DEBUG
if (debug & DEBUG_OTHER) {
cout << "DEBUG(ARGS):\n\tpath: " << path << "\n\tosisDoc: " << osisDoc << "\n\tcreate: " << append << "\n\tcompressType: " << compType << "\n\tblockType: " << iType << "\n\tcipherKey: " << cipherKey.c_str() << "\n\tnormalize: " << normalize << endl;
}
-#endif
if (!append) { // == 0 then create module
// Try to initialize a default set of datafiles and indicies at our
@@ -1532,7 +1504,7 @@
FMT_UNKNOWN, // markup
0, // lang
v11n // versification
- );
+ );
}
else if (entrySize == 4) {
// Create a raw text module allowing very large entries
More information about the sword-cvs
mailing list