[sword-svn] r1788 - in trunk: . utilities
scribe at crosswire.org
scribe at crosswire.org
Sat Apr 30 23:41:20 MST 2005
Author: scribe
Date: 2005-04-30 23:41:19 -0700 (Sat, 30 Apr 2005)
New Revision: 1788
Modified:
trunk/usrinst.sh
trunk/utilities/mod2osis.cpp
trunk/utilities/osis2mod.cpp
Log:
Getting closer. Can't figure out why no <w> tags when exporting WHAC
Modified: trunk/usrinst.sh
===================================================================
--- trunk/usrinst.sh 2005-04-30 20:09:03 UTC (rev 1787)
+++ trunk/usrinst.sh 2005-05-01 06:41:19 UTC (rev 1788)
@@ -6,8 +6,8 @@
OPTIONS="--sysconfdir=/etc $OPTIONS"
#OPTIONS="--with-icu $OPTIONS"
#OPTIONS="--with-vcl $OPTIONS"
-#OPTIONS="--enable-debug $OPTIONS"
-OPTIONS="--enable-profile $OPTIONS"
+OPTIONS="--enable-debug $OPTIONS"
+#OPTIONS="--enable-profile $OPTIONS"
#OPTIONS="--with-lucene $OPTIONS"
#OPTIONS="--without-curl $OPTIONS"
#OPTIONS="--enable-tests $OPTIONS"
Modified: trunk/utilities/mod2osis.cpp
===================================================================
--- trunk/utilities/mod2osis.cpp 2005-04-30 20:09:03 UTC (rev 1787)
+++ trunk/utilities/mod2osis.cpp 2005-05-01 06:41:19 UTC (rev 1788)
@@ -186,17 +186,8 @@
delete [] buf;
openchap = true;
}
- /*
- char *text = inModule->getRawEntry();
- int len = (inModule->getEntrySize() + 1) * 15;
- buf = new char [ len ];
- memset(buf, 0, len);
- memcpy(buf, inModule->getRawEntry(), inModule->getEntrySize());
- filter.ProcessText(buf, len - 3, vkey, inModule);
- cout << buf << endl;
- */
-// cout << "<verse osisID=\"" << vkey->getOSISRef() << "\">" << inModule->RenderText() << "</verse>" << endl;
- cout << inModule->RenderText() << endl;
+ SWBuf verseText = inModule->RenderText();
+ cout << verseText.c_str() << endl;
lastChap = vkey->Chapter();
lastBook = vkey->Book();
lastTest = vkey->Testament();
Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp 2005-04-30 20:09:03 UTC (rev 1787)
+++ trunk/utilities/osis2mod.cpp 2005-05-01 06:41:19 UTC (rev 1788)
@@ -12,6 +12,7 @@
#endif
#include <utilstr.h>
+#include <filemgr.h>
#include <swmgr.h>
#include <rawtext.h>
#include <iostream>
@@ -32,53 +33,9 @@
RawText *module;
VerseKey *currentVerse = 0;
-char readline(int fd, char **buf) {
- char ch;
- if (*buf)
- delete [] *buf;
- *buf = 0;
- int len;
-
- long index = lseek(fd, 0, SEEK_CUR);
- // clean up any preceding white space
- while ((len = read(fd, &ch, 1)) == 1) {
- if ((ch != 13) && (ch != ' ') && (ch != '\t'))
- break;
- else index++;
- }
-
-
- while (ch != 10) {
- if ((len = read(fd, &ch, 1)) != 1)
- break;
- }
-
- int size = (lseek(fd, 0, SEEK_CUR) - index) - 1;
-
- *buf = new char [ size + 1 ];
-
- if (size > 0) {
- lseek(fd, index, SEEK_SET);
- read(fd, *buf, size);
- read(fd, &ch, 1); //pop terminating char
- (*buf)[size] = 0;
-
- // clean up any trailing junk on buf
- for (char *it = *buf+(strlen(*buf)-1); it > *buf; it--) {
- if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t'))
- break;
- else *it = 0;
- }
- }
- else **buf = 0;
- return !len;
-}
-
-
-char* deleteSubverses(char *buf) {
- // remove subverse elements from osisIDs
- // (this is a hack and should be handled better with VerseKey2)
+// remove subverse elements from osisIDs
+void deleteSubverses(SWBuf &buf) {
for (int i = 0; buf[i]; i++) {
if (buf[i] == '!') {
while (buf[i] && buf[i] != ' ') {
@@ -88,7 +45,6 @@
i--;
}
}
- return buf;
}
@@ -108,6 +64,7 @@
else return true; // no check if we're a heading... Probably bad.
}
+
void makeKJVRef(VerseKey &key) {
cout << "re-versified " << key;
// cout << "\tC" << (int)(key.builtin_books[key.Testament()-1][key.Book()-1].chapmax) << ":V" << (int)(key.builtin_books[key.Testament()-1][key.Book()-1].versemax[key.Chapter()-1]);
@@ -121,6 +78,7 @@
cout << "\tas " << key << endl;
}
+
void writeEntry(VerseKey &key, SWBuf &text, bool suppressOutput = false) {
// cout << "Verse: " << key << "\n";
// cout << "TEXT: " << text << "\n\n";
@@ -145,6 +103,7 @@
key = saveKey;
}
+
void linkToEntry(VerseKey& dest) {
// cout << "Verse: " << key << "\n";
// cout << "TEXT: " << text << "\n\n";
@@ -190,35 +149,38 @@
return false; //don't add </title> to the text itself
}
- // BOOK START
- if (((!strcmp(token.getName(), "div")) && (!token.isEndTag() && !(token.getAttribute("eID"))) && (token.getAttribute("osisID"))) && (!strcmp(token.getAttribute("type"), "book"))) {
- inVerse = false;
- if (inHeader) { // this one should never happen, but just in case
-// cout << "HEADING ";
- currentVerse->Testament(0);
- currentVerse->Book(0);
- currentVerse->Chapter(0);
- currentVerse->Verse(0);
- writeEntry(*currentVerse, text);
- inHeader = false;
- }
- *currentVerse = token.getAttribute("osisID");
- currentVerse->Chapter(0);
- currentVerse->Verse(0);
- inHeader = true;
- headerType = "book";
- lastTitle = "";
- text = "";
- return true;
- }
+//-- START TAG WITH OSIS ID -------------------------------------------------------------------------
- // START TAG WITH OSIS ID
- else if ((!token.isEndTag()) && (!token.getAttribute("eID")) && (token.getAttribute("osisID")))
+ if ((!token.isEndTag()) && (!token.getAttribute("eID")) && (token.getAttribute("osisID"))) {
+
+ // BOOK START
+ if ((!strcmp(token.getName(), "div")) && (!strcmp(token.getAttribute("type"), "book"))) {
+ inVerse = false;
+ if (inHeader) { // this one should never happen, but just in case
+ // cout << "HEADING ";
+ currentVerse->Testament(0);
+ currentVerse->Book(0);
+ currentVerse->Chapter(0);
+ currentVerse->Verse(0);
+ writeEntry(*currentVerse, text);
+ inHeader = false;
+ }
+ *currentVerse = token.getAttribute("osisID");
+ currentVerse->Chapter(0);
+ currentVerse->Verse(0);
+ inHeader = true;
+ headerType = "book";
+ lastTitle = "";
+ text = "";
+
+ return true;
+ }
+
// CHAPTER START
- if (((!strcmp(token.getName(), "div")) && (!strcmp(token.getAttribute("type"), "chapter")))
+ else if (((!strcmp(token.getName(), "div")) && (!strcmp(token.getAttribute("type"), "chapter")))
|| (!strcmp(token.getName(), "chapter"))
) {
inVerse = false;
@@ -266,98 +228,112 @@
inHeader = false;
}
- char *subverseBuf = 0;
- stdstr(&subverseBuf, token.getAttribute("osisID"));
- deleteSubverses(subverseBuf);
- *currentVerse = subverseBuf;
+ SWBuf keyVal = token.getAttribute("osisID");
+ deleteSubverses(keyVal);
- char *pos = 0;
- while ((pos = strchr(pos, ' '))) {
- *pos = ';';
- }
+ // turn "Mat.1.1 Mat.1.2" into "Mat.1.1; Mat.1.2"
+ bool skipSpace = false;
+ for (int i = 0; keyVal[i]; i++) {
+ if (keyVal[i] == ' ') {
+ if (!skipSpace) {
+ keyVal[i] = ';';
+ skipSpace = true;
+ }
+ }
+ else skipSpace = false;
+ }
- //cout << "set the list\n" << token.getAttribute("osisID");
- lastVerseIDs = currentVerse->ParseVerseList(subverseBuf);
-// if (lastVerseIDs.Count() > 1)
-// cout << "count is" << lastVerseIDs.Count();
+ lastVerseIDs = currentVerse->ParseVerseList(keyVal);
+ if (lastVerseIDs.Count())
+ *currentVerse = lastVerseIDs.getElement(0)->getText();
- if (lastVerseIDs.Count())
- *currentVerse = lastVerseIDs.getElement(0)->getText();
+ return true;
+ }
+ }
-// text.append(token);
- return true;
- }
+//-- END TAG ---------------------------------------------------------------------------------------------
- // VERSE END
- else if ((!strcmp(token.getName(), "verse")) && (token.isEndTag() || (token.getAttribute("eID")))) {
- inVerse = false;
- if (lastTitle.length()) {
- const char* end = strchr(lastTitle, '>');
-// printf("length=%d, tag; %s\n", end+1 - lastTitle.c_str(), lastTitle.c_str());
+ else if ((token.isEndTag()) || (token.getAttribute("eID"))) {
- SWBuf titleTagText;
- titleTagText.append(lastTitle.c_str(), end+1 - lastTitle.c_str());
-// printf("tagText: %s\n", titleTagText.c_str());
+ // VERSE END
+ if (!strcmp(token.getName(), "verse")) {
+ inVerse = false;
+ if (lastTitle.length()) {
+ const char* end = strchr(lastTitle, '>');
+ // printf("length=%d, tag; %s\n", end+1 - lastTitle.c_str(), lastTitle.c_str());
- XMLTag titleTag(titleTagText);
- titleTag.setAttribute("type", "section");
- titleTag.setAttribute("subtype", "x-preverse");
+ SWBuf titleTagText;
+ titleTagText.append(lastTitle.c_str(), end+1 - lastTitle.c_str());
+ // printf("tagText: %s\n", titleTagText.c_str());
- //we insert the title into the text again - make sure to remove the old title text
- const char* pos = strstr(text, lastTitle);
- if (pos) {
- SWBuf temp;
- temp.append(text, pos-text.c_str());
- temp.append(pos+lastTitle.length());
- text = temp;
- }
+ XMLTag titleTag(titleTagText);
+ titleTag.setAttribute("type", "section");
+ titleTag.setAttribute("subtype", "x-preverse");
+
+ //we insert the title into the text again - make sure to remove the old title text
+ const char* pos = strstr(text, lastTitle);
+ if (pos) {
+ SWBuf temp;
+ temp.append(text, pos-text.c_str());
+ temp.append(pos+lastTitle.length());
+ text = temp;
+ }
- //if a title was already inserted at the beginning insert this one after that first title
- int titlePos = 0;
- if (!strncmp(text.c_str(),"<title ",7)) {
- const char* tmp = strstr(text.c_str(), "</title>");
- if (tmp) {
- titlePos = (tmp-text.c_str()) + 8;
+ //if a title was already inserted at the beginning insert this one after that first title
+ int titlePos = 0;
+ if (!strncmp(text.c_str(),"<title ",7)) {
+ const char* tmp = strstr(text.c_str(), "</title>");
+ if (tmp) {
+ titlePos = (tmp-text.c_str()) + 8;
+ }
}
+ text.insert(titlePos, end+1);
+ text.insert(titlePos, titleTag);
}
- text.insert(titlePos, end+1);
- text.insert(titlePos, titleTag);
- }
-// text += token;
- writeEntry(*currentVerse, text);
+ // text += token;
+ writeEntry(*currentVerse, text);
- // If we found an osisID like osisID="Gen.1.1 Gen.1.2 Gen.1.3" we have to link Gen.1.2 and Gen.1.3 to Gen.1.1
- VerseKey dest = *currentVerse;
- for (int i = 0; i < lastVerseIDs.Count(); ++i) {
- VerseKey linkKey;
- linkKey.AutoNormalize(0);
- linkKey.Headings(1); // turn on mod/testmnt/book/chap headings
- linkKey.Persist(1);
- linkKey = lastVerseIDs.getElement(i)->getText();
+ // If we found an osisID like osisID="Gen.1.1 Gen.1.2 Gen.1.3" we have to link Gen.1.2 and Gen.1.3 to Gen.1.1
+ VerseKey dest = *currentVerse;
+ for (int i = 0; i < lastVerseIDs.Count(); ++i) {
+ VerseKey linkKey;
+ linkKey.AutoNormalize(0);
+ linkKey.Headings(1); // turn on mod/testmnt/book/chap headings
+ linkKey.Persist(1);
+ linkKey = lastVerseIDs.getElement(i)->getText();
- if (linkKey.Verse() != currentVerse->Verse() || linkKey.Chapter() != currentVerse->Chapter() || linkKey.Book() != currentVerse->Book() || linkKey.Testament() != currentVerse->Testament()) {
- *currentVerse = linkKey;
- linkToEntry(dest);
+ if (linkKey.Verse() != currentVerse->Verse() || linkKey.Chapter() != currentVerse->Chapter() || linkKey.Book() != currentVerse->Book() || linkKey.Testament() != currentVerse->Testament()) {
+ *currentVerse = linkKey;
+ linkToEntry(dest);
+ }
}
- }
- lastTitle = "";
- text = "";
- return true;
- }
- else if (!inVerse && (token.isEndTag() || (token.getAttribute("eID"))) && (!strcmp(token.getName(), "p") || !strcmp(token.getName(), "div") || !strcmp(token.getName(), "q") || !strcmp(token.getName(), "l") || !strcmp(token.getName(), "lg"))) {
+ lastTitle = "";
+ text = "";
+ return true;
+ }
+
+ // OTHER MISC END TAGS WHEN !INVERSE
+ // we really should decide how to handle end tags /e.g. of a chapter). There's no way for frontends to
+ // see to what OSIS tag the end tag (which is added to the verse text!) belongs. It mixes up the rendering as a result
+ // included /div for now (jansorg)
+ else if (!inVerse &&
+ (!strcmp(token.getName(), "p") ||
+ !strcmp(token.getName(), "div") ||
+ !strcmp(token.getName(), "q") ||
+ !strcmp(token.getName(), "l") ||
+ !strcmp(token.getName(), "lg"))) {
-// we really should decide how to handle end tags /e.g. of a chapter). There's no way for frontends to
-// see to what OSIS tag the end tag (which is added to the verse text!) belongs. It mixes up the rendering as a result
-// included /div for now (jansorg)
-// else if (!inVerse && (token.isEndTag() || (token.getAttribute("eID"))) && (!strcmp(token.getName(), "p") || !strcmp(token.getName(), "q") || !strcmp(token.getName(), "l") || !strcmp(token.getName(), "lg"))) {
- text.append( token );
- writeEntry(*currentVerse, text, true);
- text = "";
- return true;
- }
+ text.append(token);
+ writeEntry(*currentVerse, text, true);
+ text = "";
+
+ return true;
+
+ }
+ }
return false;
}
@@ -368,7 +344,7 @@
// Let's test our command line arguments
if (argc < 3) {
- fprintf(stderr, "usage: %s <path/to/mod/files> <osisDoc> [0|1 - create module|augment]\n\n", argv[0]);
+ fprintf(stderr, "\nusage: %s <output/path> <osisDoc> [0 - create module (default)|1 - augment]\n\n", argv[0]);
exit(-1);
}
@@ -383,14 +359,14 @@
}
// Let's see if we can open our input file
- int fd = open(argv[2], O_RDONLY|O_BINARY);
- if (fd < 0) {
+ FileDesc *fd = FileMgr::getSystemFileMgr()->open(argv[2], O_RDONLY|O_BINARY);
+ if (fd->getFd() < 0) {
fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]);
exit(-2);
}
// Do some initialization stuff
- char *buffer = 0;
+ SWBuf buffer;
module = new RawText(argv[1]); // open our datapath with our RawText driver.
currentVerse = new VerseKey();
currentVerse->AutoNormalize(0);
@@ -401,13 +377,13 @@
(*module) = TOP;
- char *from;
+ const char *from;
SWBuf token;
SWBuf text;
bool intoken = false;
- while (!readline(fd, &buffer)) {
- for (from = buffer; *from; from++) {
+ while (FileMgr::getLine(fd, buffer)) {
+ for (from = buffer.c_str(); *from; from++) {
if (*from == '<') {
intoken = true;
token = "<";
@@ -427,16 +403,13 @@
}
if (intoken)
- token.append( *from );
+ token.append(*from);
else
- text.append( *from );
+ text.append(*from);
}
}
- // clear up our buffer that readline might have allocated
- if (buffer)
- delete [] buffer;
delete module;
delete currentVerse;
- close(fd);
+ FileMgr::getSystemFileMgr()->close(fd);
}
More information about the sword-cvs
mailing list