up to the end

or and is treated as part of its heading // Otherwise if it a title in a chapter before the first the first verse it // is put into the verse as a preverse title. if (!isEndTag && titleDepth == 0 && (!strcmp(tokenName, "title")) && (!typeAttr || (strcmp(typeAttr, "main") && strcmp(typeAttr, "chapter")))) { titleOffset = text.length(); //start of the title tag lastTitle = ""; inTitle = true; tagStack.push(token); // cout << "push " << token->getName() << endl; titleDepth = tagStack.size(); return false; } // Check titleDepth since titles can be nested. Don't want to quit too early. else if (isEndTag && tagDepth == titleDepth && (!strcmp(tokenName, "title"))) { lastTitle.append(text.c_str() + titleOffset); // up to the end lastTitle.append(*token); // // cout << "lastTitle: " << lastTitle.c_str() << endl; // cout << "text-lastTitle: " << text.c_str()+titleOffset << endl; // cout << "text: " << text.c_str() << endl; inTitle = false; titleDepth = 0; tagStack.pop(); return false; // don't add to the text itself } //-- START TAG ------------------------------------------------------------------------- if (!isEndTag) { // Remember non-empty start tags if (!token->isEmpty()) { tagStack.push(token); // cout << "push " << token->getName() << endl; } //-- WITH OSIS ID ------------------------------------------------------------------------- if (token->getAttribute("osisID")) { // BOOK START if ((!strcmp(tokenName, "div")) && (!strcmp(typeAttr, "book"))) { inVerse = false; if (inBookHeader || inChapterHeader) { // this one should never happen, but just in case // cout << "HEADING "; currentVerse->Testament(0); currentVerse->Book(0); currentVerse->Chapter(0); currentVerse->Verse(0); writeEntry(*currentVerse, text); } *currentVerse = token->getAttribute("osisID"); currentVerse->Chapter(0); currentVerse->Verse(0); inBookHeader = true; inChapterHeader = false; lastTitle = ""; text = ""; bookDepth = tagStack.size(); chapterDepth = 0; verseDepth = 0; return true; } // CHAPTER START else if (((!strcmp(tokenName, "div")) && (!strcmp(typeAttr, "chapter"))) || (!strcmp(tokenName, "chapter")) ) { inVerse = false; if (inBookHeader) { // cout << "BOOK HEADING "<< text.c_str() << endl; writeEntry(*currentVerse, text); } *currentVerse = token->getAttribute("osisID"); currentVerse->Verse(0); inBookHeader = false; inChapterHeader = true; lastTitle = ""; text = ""; chapterDepth = tagStack.size(); verseDepth = 0; return true; } // VERSE START else if (!strcmp(tokenName, "verse")) { inVerse = true; if (inChapterHeader) { SWBuf heading = text; //make sure we don't insert the preverse title which belongs to the first verse of this chapter! // Did we have a preverse title? if (lastTitle.length()) { //Was the preVerse title in the header (error if not)? const char* header = heading.c_str(); const char* preVerse = strstr(header, lastTitle); if (preVerse) { if (preVerse == header) { heading = ""; // do nothing } else { // remove everything before the title from the beginning. text = preVerse; // Remove text from the end of the header. heading.setSize(preVerse - header); } } else { cout << "Warning: Bug in code. Could not find title." << endl; } } else { text = ""; } if (heading.length()) { // cout << "CHAPTER HEADING "<< heading.c_str() << endl; writeEntry(*currentVerse, heading); } inChapterHeader = false; } SWBuf keyVal = token->getAttribute("osisID"); deleteSubverses(keyVal); // turn "Mat.1.1 Mat.1.2" into "Mat.1.1; Mat.1.2" bool skipSpace = false; for (int i = 0; keyVal[i]; i++) { if (keyVal[i] == ' ') { if (!skipSpace) { keyVal[i] = ';'; skipSpace = true; } } else skipSpace = false; } lastVerseIDs = currentVerse->ParseVerseList(keyVal); if (lastVerseIDs.Count()) { *currentVerse = lastVerseIDs.getElement(0)->getText(); } verseDepth = tagStack.size(); return true; } } // Handle stuff between the verses // Whitespace producing empty tokens are appended to prior entry // Also the quote // This is a hack to get ESV to work else if (!inTitle && !inVerse && token->isEmpty()) { // && !inBookHeader && !inChapterHeader) { if (!strcmp(tokenName, "p") || !strcmp(tokenName, "div") || !strcmp(tokenName, "q") || !strcmp(tokenName, "l") || !strcmp(tokenName, "lb") || !strcmp(tokenName, "lg") ) { // if (token) { // cout << "start token " << *token << ":" << text.c_str() << endl; // } SWBuf tmp = token->toString(); writeEntry(*currentVerse, tmp); return true; } } } //-- END TAG --------------------------------------------------------------------------------------------- else { if (tagStack.empty()) { cout << "tag expected" << endl; exit(1); } XMLTag* topToken = 0; if (!token->isEmpty()) { topToken = tagStack.top(); tagDepth = tagStack.size(); // cout << "pop " << topToken->getName() << endl; tagStack.pop(); if (strcmp(topToken->getName(), tokenName)) { cout << "Error: " << *currentVerse << ": Expected " << topToken->getName() << " found " << tokenName << endl; exit(1); } } // VERSE END if (!strcmp(tokenName, "verse")) { inVerse = false; if (tagDepth != verseDepth) { cout << "Warning verse " << *currentVerse << " is not well formed:(" << verseDepth << "," << tagDepth << ")" << endl; } if (lastTitle.length()) { const char* end = strchr(lastTitle, '>'); // cout << lastTitle << endl; // cout << "length=" << int(end+1 - lastTitle.c_str()) << ", tag:" << lastTitle.c_str() << endl; SWBuf titleTagText; titleTagText.append(lastTitle.c_str(), end+1 - lastTitle.c_str()); // cout << "tagText: " << titleTagText.c_str() << endl;; XMLTag titleTag(titleTagText); titleTag.setAttribute("type", "section"); titleTag.setAttribute("subType", "x-preverse"); //we insert the title into the text again - make sure to remove the old title text const char* pos = strstr(text, lastTitle); if (pos) { SWBuf temp; temp.append(text, pos-text.c_str()); temp.append(pos+lastTitle.length()); text = temp; } //if a title was already inserted at the beginning insert this one after that first title int titlePos = 0; if (!strncmp(text.c_str(),""); if (tmp) { titlePos = (tmp-text.c_str()) + 8; } } text.insert(titlePos, end+1); text.insert(titlePos, titleTag); } // text += token; writeEntry(*currentVerse, text); // If we found an osisID like osisID="Gen.1.1 Gen.1.2 Gen.1.3" we have to link Gen.1.2 and Gen.1.3 to Gen.1.1 VerseKey dest = *currentVerse; for (int i = 0; i < lastVerseIDs.Count(); ++i) { VerseKey linkKey; linkKey.AutoNormalize(0); linkKey.Headings(1); // turn on mod/testmnt/book/chap headings linkKey.Persist(1); linkKey = lastVerseIDs.getElement(i)->getText(); if (linkKey.Verse() != currentVerse->Verse() || linkKey.Chapter() != currentVerse->Chapter() || linkKey.Book() != currentVerse->Book() || linkKey.Testament() != currentVerse->Testament()) { *currentVerse = linkKey; linkToEntry(dest); } } lastTitle = ""; text = ""; verseDepth = 0; return true; } else if (!inTitle && !inVerse && !inBookHeader && !inChapterHeader) { // cout << "End tag not in verse: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl; // Is this the end of a chapter. if (tagDepth == chapterDepth && (!strcmp(tokenName, "div") || !strcmp(tokenName, "chapter"))) { chapterDepth = 0; verseDepth = 0; text = ""; return true; } // Or is it the end of a book else if (tagDepth == bookDepth && (!strcmp(tokenName, "div"))) { bookDepth = 0; chapterDepth = 0; verseDepth = 0; text = ""; return true; } // OTHER MISC END TAGS WHEN !INVERSE // Test that is between verses, or after the last is appended to the preceeding verse. else if (!strcmp(tokenName, "p") || !strcmp(tokenName, "div") || !strcmp(tokenName, "q") || !strcmp(tokenName, "l") || !strcmp(tokenName, "lb") || !strcmp(tokenName, "lg") ) { text.append(*token); writeEntry(*currentVerse, text); text = ""; return true; } } } return false; } XMLTag* transform(XMLTag* t) { static std::stack<XMLTag*> tagStack; static int sID = 1; char buf[11]; // Support simplification transformations if (!t->isEmpty()) { if (!t->isEndTag()) { tagStack.push(t); // Transform <q> into <q sID=""/> except for <q who="Jesus"> if ((!strcmp(t->getName(), "q")) && (!t->getAttribute("who") || strcmp(t->getAttribute("who"), "Jesus"))) { t->setEmpty(true); sprintf(buf, "q%d", sID++); t->setAttribute("sID", buf); } // Transform <p> into <lb type="x-begin-paragraph"/> else if (!strcmp(t->getName(), "p")) { // note there is no process that should care about type, it is there for reversability t->setText("<lb type=\"x-begin-paragraph\" />"); } } else { XMLTag *topToken = tagStack.top(); tagStack.pop(); // If we have found an end tag for a <q> that was transformed then transform this one as well. if ((!strcmp(t->getName(), "q")) && (!strcmp(topToken->getName(), "q")) && (!topToken->getAttribute("who") || strcmp(topToken->getAttribute("who"), "Jesus"))) { // make this a clone of the start tag with sID changed to eID *t = *topToken; t->setAttribute("eID", t->getAttribute("sID")); t->setAttribute("sID", 0); } // Look for paragraph tags. // If we have found an end tag for a <p> that was transformed then transform this as well. else if ((!strcmp(t->getName(), "p")) && (!strcmp(topToken->getName(), "lb"))) { t->setText("<lb type=\"x-end-paragraph\" />"); } } } return t; } int main(int argc, char **argv) { // Let's test our command line arguments if (argc < 3) { fprintf(stderr, "\nusage: osis2mod <output/path> <osisDoc> [createMod] [compressType [blockType [cipherKey]]]\n"); fprintf(stderr, " createMod : (default 0): 0 - create 1 - augment\n"); fprintf(stderr, " compressType: (default 0): 0 - no compression 1 - LZSS 2 - Zip\n"); fprintf(stderr, " blockType : (default 4): 2 - verses 3 - chapters 4 - books\n"); fprintf(stderr, " cipherkey : ascii string for module encryption\n"); exit(-1); } int iType = 4; int compType = 0; string cipherKey = ""; SWCompress *compressor = 0; // SWModule *outModule = 0; if (argc > 4) { compType = atoi(argv[4]); if (argc > 5) { iType = atoi(argv[5]); if (argc > 6) { cipherKey = argv[6]; } } } switch (compType) { // these are deleted by zText case 0: break; case 1: compressor = new LZSSCompress(); break; case 2: compressor = new ZipCompress(); break; } // cout << "path: " << argv[1] << " osisDoc: " << argv[2] << " create: " << argv[3] << " compressType: " << compType << " blockType: " << iType << " cipherKey: " << cipherKey.c_str() << "\n"; // cout << ""; // exit(-3); if ((argc<4)||(!strcmp(argv[3], "0"))) { // == 0 then create module // Try to initialize a default set of datafiles and indicies at our // datapath location passed to us from the user. if ( compressor ){ if ( zText::createModule(argv[1], iType) ){ fprintf(stderr, "error: %s: couldn't create module at path: %s \n", argv[0], argv[1]); exit(-3); } } else if (RawText::createModule(argv[1])) { fprintf(stderr, "error: %s: couldn't create module at path: %s \n", argv[0], argv[1]); exit(-3); } } // Let's see if we can open our input file FileDesc *fd = FileMgr::getSystemFileMgr()->open(argv[2], FileMgr::RDONLY); if (fd->getFd() < 0) { fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]); exit(-2); } // Do some initialization stuff SWBuf buffer; if (compressor){ module = new zText(argv[1], 0, 0, iType, compressor); } else{ module = new RawText(argv[1]); // open our datapath with our RawText driver. } SWFilter *cipherFilter = 0; if (!cipherKey.empty()){ fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() ); cipherFilter = new CipherFilter(cipherKey.c_str()); module->AddRawFilter(cipherFilter); } if (!module->isWritable()) { fprintf(stderr, "The module is not writable. Writing text to it will not work.\nExiting.\n" ); exit(-1); } activeOsisID[0] = '\0'; currentVerse = new VerseKey(); currentVerse->AutoNormalize(0); currentVerse->Headings(1); // turn on mod/testmnt/book/chap headings currentVerse->Persist(1); module->setKey(*currentVerse); (*module) = TOP; const char *from; SWBuf token; SWBuf text; bool intoken = false; while (FileMgr::getLine(fd, buffer)) { //cout << "Line: " << buffer.c_str() << endl; for (from = buffer.c_str(); *from; from++) { if (*from == '<') { intoken = true; token = "<"; continue; } if (*from == '>') { intoken = false; token.append('>'); // take this isalpha if out to check for bugs in text if ((isalpha(token[1])) || (isalpha(token[2]))) { //cout << "Handle:" << token.c_str() << endl; XMLTag *t = new XMLTag(token.c_str()); if (!handleToken(text, transform(t))) { text.append(*t); } } continue; } if (intoken) token.append(*from); else text.append(*from); } if (intoken) token.append("\n"); } // Force the last entry from the buffer. text = ""; writeEntry(*currentVerse, text, true); delete module; delete currentVerse; if (cipherFilter) delete cipherFilter; FileMgr::getSystemFileMgr()->close(fd); }