[sword-cvs] sword/utilities vpl2mod.cpp,1.20,1.21 vs2osisref.cpp,1.3,1.4
sword@www.crosswire.org
sword@www.crosswire.org
Mon, 26 May 2003 01:36:33 -0700
Update of /usr/local/cvsroot/sword/utilities
In directory www:/tmp/cvs-serv17071/utilities
Modified Files:
vpl2mod.cpp vs2osisref.cpp
Log Message:
Improved verse reference parsing to include proper
parsing of osisRef attributes and other
misc. anomalies.
Returned canon.h back to previous state to
avoid locale key mismatches
Index: vpl2mod.cpp
===================================================================
RCS file: /usr/local/cvsroot/sword/utilities/vpl2mod.cpp,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** vpl2mod.cpp 26 May 2003 04:32:45 -0000 1.20
--- vpl2mod.cpp 26 May 2003 08:36:31 -0000 1.21
***************
*** 1,7 ****
! // Compression on variable granularity
#include <fcntl.h>
! #include <iostream>
! #include <fstream>
! #include <string>
#ifndef __GNUC__
--- 1,7 ----
! #include <ctype.h>
! #include <stdio.h>
#include <fcntl.h>
! #include <errno.h>
! #include <stdlib.h>
#ifndef __GNUC__
***************
*** 11,107 ****
#endif
- #include <ztext.h>
- #include <zld.h>
- #include <zcom.h>
#include <swmgr.h>
! #include <lzsscomprs.h>
! #include <zipcomprs.h>
! #include <versekey.h>
! #include <thmlosis.h>
! #include <stdio.h>
! #include <markupfiltmgr.h>
! #include <algorithm>
#ifndef NO_SWORD_NAMESPACE
! using namespace sword;
#endif
- using std::endl;
- using std::cerr;
- using std::cout;
! const char *convertToOSIS(const char *inRef, const SWKey *key) {
! static std::string outRef;
- outRef = "";
! VerseKey defLanguage;
! ListKey verses = defLanguage.ParseVerseList(inRef, (*key), true);
! const char *startFrag = inRef;
! for (int i = 0; i < verses.Count(); i++) {
! VerseKey *element = SWDYNAMIC_CAST(VerseKey, verses.GetElement(i));
! char buf[5120];
! char frag[800];
! char preJunk[800];
! char postJunk[800];
! *preJunk = 0;
! *postJunk = 0;
! while ((*startFrag) && (strchr(" ;,()[].", *startFrag))) {
! outRef += *startFrag;
! startFrag++;
! }
! if (element) {
! memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1);
! frag[((const char *)element->userData - startFrag) + 1] = 0;
! int j;
! for (j = strlen(frag)-1; j && (strchr(" ;,()[].", frag[j])); j--);
! if (frag[j+1])
! strcpy(postJunk, frag+j);
! frag[j+1]=0;
! startFrag += (j+1);
! sprintf(buf, "<reference osisRef=\"%s-%s\">%s</reference>***%s", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag, postJunk);
}
! else {
! memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1);
! frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0;
! int j;
! for (j = strlen(frag)-1; j && (strchr(" ;,()[].", frag[j])); j--);
! if (frag[j+1])
! strcpy(postJunk, frag+j+1);
! frag[j+1]=0;
! startFrag += ((const char *)verses.GetElement(i)->userData - startFrag) + 1;
! sprintf(buf, "<reference osisRef=\"%s\">%s</reference>%s", VerseKey(*verses.GetElement(i)).getOSISRef(), frag, postJunk);
}
! outRef+=buf;
}
! outRef+=startFrag;
! return outRef.c_str();
}
- int main(int argc, char **argv)
- {
- if (argc < 2) {
- cerr << argv[0] << " - a tool to convert verse references from English to OSIS\n";
- cerr << "usage: "<< argv[0] << " <verse ref> [verse context]\n";
- cerr << "\n\n";
- exit(-1);
- }
- VerseKey verseKey;
- int i = strlen(argv[1]) + 1;
- char * verseString = new char[i];
- *verseString = 0;
- strcpy (verseString, argv[1]);
- verseString[i + 1] = 0;
-
- if (argc > 2) {
- verseKey = argv[2];
- }
- else {
- verseKey = "Gen 1:1";
- }
! std::cout << convertToOSIS(verseString, &verseKey) << "\n";
! return 0;
}
--- 11,263 ----
#endif
#include <swmgr.h>
! #include <rawtext.h>
! #include <iostream>
! #include <string>
!
! #ifndef O_BINARY
! #define O_BINARY 0
! #endif
#ifndef NO_SWORD_NAMESPACE
! using sword::SWMgr;
! using sword::RawText;
! using sword::VerseKey;
! using sword::SW_POSITION;
#endif
+ using std::string;
! char readline(int fd, char **buf) {
! char ch;
! if (*buf)
! delete [] *buf;
! *buf = 0;
! int len;
! long index = lseek(fd, 0, SEEK_CUR);
! // clean up any preceding white space
! while ((len = read(fd, &ch, 1)) == 1) {
! if ((ch != 13) && (ch != ' ') && (ch != '\t'))
! break;
! else index++;
! }
!
!
! while (ch != 10) {
! if ((len = read(fd, &ch, 1)) != 1)
! break;
! }
!
! int size = (lseek(fd, 0, SEEK_CUR) - index) - 1;
!
! *buf = new char [ size + 1 ];
!
! if (size > 0) {
! lseek(fd, index, SEEK_SET);
! read(fd, *buf, size);
! read(fd, &ch, 1); //pop terminating char
! (*buf)[size] = 0;
!
! // clean up any trailing junk on buf
! for (char *it = *buf+(strlen(*buf)-1); it > *buf; it--) {
! if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t'))
! break;
! else *it = 0;
}
! }
! else **buf = 0;
! return !len;
! }
!
!
! char *parseVReg(char *buf) {
! char stage = 0;
!
! while (*buf) {
! switch (stage) {
! case 0:
! if (isalpha(*buf))
! stage++;
! break;
! case 1:
! if (isdigit(*buf))
! stage++;
! break;
! case 2:
! if (*buf == ':')
! stage++;
! break;
! case 3:
! if (isdigit(*buf))
! stage++;
! break;
! case 4:
! if (*buf == ' ') {
! *buf = 0;
! return ++buf;
! }
! break;
}
! buf++;
}
! return (stage == 4) ? buf : 0; // if we got to stage 4 return after key buf, else return 0;
}
! bool isKJVRef(const char *buf) {
! VerseKey vk, test;
! vk.AutoNormalize(0);
! vk.Headings(1); // turn on mod/testmnt/book/chap headings
! vk.Persist(1);
! // lets do some tests on the verse --------------
! vk = buf;
! test = buf;
! if (vk.Testament() && vk.Book() && vk.Chapter() && vk.Verse()) { // if we're not a heading
! // std::cerr << (const char*)vk << " == " << (const char*)test << std::endl;
! return (vk == test);
! }
! else return true; // no check if we're a heading... Probably bad.
! }
!
!
! void fixText(char *text) {
! char *to = text;
! while(*text) {
! *to++ = *text++;
! *to++ = *text++;
! if (!*text)
! break;
! if (*text != ' ')
! std::cerr << "problem\n";
! else text++;
! }
! *to = 0;
}
+ int main(int argc, char **argv) {
+
+ // Let's test our command line arguments
+ if (argc < 2) {
+ // fprintf(stderr, "usage: %s <vpl_file> </path/to/mod> [0|1 - file includes prepended verse references]\n", argv[0]);
+ fprintf(stderr, "usage: %s <source_vpl_file> </path/to/output/mod/> [0|1 - prepended verse refs] [0|1 - NT only]\n\n", argv[0]);
+ fprintf(stderr, "\tWith no verse refs, source file must contain exactly 31102 lines.\n");
+ fprintf(stderr, "\tThis is KJV verse count plus headings for MODULE,\n");
+ fprintf(stderr, "\tTESTAMENT, BOOK, CHAPTER. An example snippet follows:\n\n");
+ fprintf(stderr, "\t\tMODULE HEADER\n");
+ fprintf(stderr, "\t\tOLD TESTAMENT HEADER\n");
+ fprintf(stderr, "\t\tGENESIS HEADER\n");
+ fprintf(stderr, "\t\tCHAPTER 1 HEADER\n");
+ fprintf(stderr, "\t\tIn the beginning...\n\n");
+ fprintf(stderr, "\t... implying there must also be a CHAPTER2 HEADER,\n");
+ fprintf(stderr, "\tEXODUS HEADER, NEW TESTAMENT HEADER, etc. If there is no text for\n");
+ fprintf(stderr, "\tthe header, a blank line must, at least, hold place.\n\n");
+ fprintf(stderr, "\tWith verse refs, source file must simply contain any number of lines,\n");
+ fprintf(stderr, "\tthat begin with the verse reference for which it is an entry. e.g.:\n\n");
+ fprintf(stderr, "\t\tgen 1:0 CHAPTER 1 HEADER\n");
+ fprintf(stderr, "\t\tgen 1:1 In the beginning...\n\n");
+ exit(-1);
+ }
+
+ // Let's see if we can open our input file
+ int fd = open(argv[1], O_RDONLY|O_BINARY);
+ if (fd < 0) {
+ fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[1]);
+ exit(-2);
+ }
+
+ // Try to initialize a default set of datafiles and indicies at our
+ // datapath location passed to us from the user.
+ if (RawText::createModule(argv[2])) {
+ fprintf(stderr, "error: %s: couldn't create module at path: %s \n", argv[0], argv[2]);
+ exit(-3);
+ }
+
+ // not used yet, but for future support of a vpl file with each line
+ // prepended with verse reference, eg. "Gen 1:1 In the beginning..."
+ bool vref = false;
+ if (argc > 3)
+ vref = (argv[3][0] == '0') ? false : true;
+
+ // if 'nt' is the 4th arg, our vpl file only has the NT
+ bool ntonly = false;
+ if (argc > 4)
+ ntonly = (argv[4][0] == '0') ? false : true;
+
+ // Do some initialization stuff
+ char *buffer = 0;
+ RawText mod(argv[2]); // open our datapath with our RawText driver.
+ VerseKey vk;
+ vk.AutoNormalize(0);
+ vk.Headings(1); // turn on mod/testmnt/book/chap headings
+ vk.Persist(1);
+
+ mod.setKey(vk);
+
+ // Loop through module from TOP to BOTTOM and set next line from
+ // input file as text for this entry in the module
+ mod = TOP;
+ if (ntonly) vk = "Matthew 1:1";
+
+ int successive = 0; //part of hack below
+ while ((!mod.Error()) && (!readline(fd, &buffer))) {
+ if (*buffer == '|') // comments, ignore line
+ continue;
+ if (vref) {
+ const char *verseText = parseVReg(buffer);
+ if (!verseText) { // if we didn't find a valid verse ref
+ std::cerr << "No valid verse ref found on line: " << buffer << "\n";
+ exit(-4);
+ }
+
+ vk = buffer;
+ if (vk.Error()) {
+ std::cerr << "Error parsing key: " << buffer << "\n";
+ exit(-5);
+ }
+ string orig = mod.getRawEntry();
+
+ if (!isKJVRef(buffer)) {
+ VerseKey origVK = vk;
+ /* This block is functioning improperly -- problem with AutoNormalize???
+ do {
+ vk--;
+ }
+ while (!vk.Error() && !isKJVRef(vk)); */
+ //hack to replace above:
+ successive++;
+ vk -= successive;
+ orig = mod.getRawEntry();
+
+ std::cerr << "Not a valid KJV ref: " << origVK << "\n";
+ std::cerr << "appending to ref: " << vk << "\n";
+ orig += " [ (";
+ orig += origVK;
+ orig += ") ";
+ orig += verseText;
+ orig += " ] ";
+ verseText = orig.c_str();
+ }
+ else {
+ successive = 0;
+ }
+
+ if (orig.length() > 1)
+ std::cerr << "Warning, overwriting verse: " << vk << std::endl;
+
+ // ------------- End verse tests -----------------
+ mod << verseText; // save text to module at current position
+ }
+ else {
+ fixText(buffer);
+ mod << buffer; // save text to module at current position
+ mod++; // increment module position
+ }
+ }
+
+ // clear up our buffer that readline might have allocated
+ if (buffer)
+ delete [] buffer;
+ }
Index: vs2osisref.cpp
===================================================================
RCS file: /usr/local/cvsroot/sword/utilities/vs2osisref.cpp,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** vs2osisref.cpp 9 May 2003 18:28:00 -0000 1.3
--- vs2osisref.cpp 26 May 2003 08:36:31 -0000 1.4
***************
*** 43,60 ****
char buf[5120];
char frag[800];
if (element) {
memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1);
frag[((const char *)element->userData - startFrag) + 1] = 0;
! startFrag = (const char *)element->userData + 1;
! sprintf(buf, "<reference osisRef=\"%s-%s\">%s</reference>", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag);
}
else {
memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1);
frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0;
! startFrag = (const char *)verses.GetElement(i)->userData + 1;
! sprintf(buf, "<reference osisRef=\"%s\">%s</reference>", VerseKey(*verses.GetElement(i)).getOSISRef(), frag);
}
outRef+=buf;
}
return outRef.c_str();
}
--- 43,79 ----
char buf[5120];
char frag[800];
+ char preJunk[800];
+ char postJunk[800];
+ *preJunk = 0;
+ *postJunk = 0;
+ while ((*startFrag) && (strchr(" ;,()[].", *startFrag))) {
+ outRef += *startFrag;
+ startFrag++;
+ }
if (element) {
memmove(frag, startFrag, ((const char *)element->userData - startFrag) + 1);
frag[((const char *)element->userData - startFrag) + 1] = 0;
! int j;
! for (j = strlen(frag)-1; j && (strchr(" ;,()[].", frag[j])); j--);
! if (frag[j+1])
! strcpy(postJunk, frag+j+1);
! frag[j+1]=0;
! startFrag += ((const char *)element->userData - startFrag) + 1;
! sprintf(buf, "<reference osisRef=\"%s-%s\">%s</reference>%s", element->LowerBound().getOSISRef(), element->UpperBound().getOSISRef(), frag, postJunk);
}
else {
memmove(frag, startFrag, ((const char *)verses.GetElement(i)->userData - startFrag) + 1);
frag[((const char *)verses.GetElement(i)->userData - startFrag) + 1] = 0;
! int j;
! for (j = strlen(frag)-1; j && (strchr(" ;,()[].", frag[j])); j--);
! if (frag[j+1])
! strcpy(postJunk, frag+j+1);
! frag[j+1]=0;
! startFrag += ((const char *)verses.GetElement(i)->userData - startFrag) + 1;
! sprintf(buf, "<reference osisRef=\"%s\">%s</reference>%s", VerseKey(*verses.GetElement(i)).getOSISRef(), frag, postJunk);
}
outRef+=buf;
}
+ outRef+=startFrag;
return outRef.c_str();
}
***************
*** 82,86 ****
}
! std::cout << convertToOSIS(verseString, &verseKey);
return 0;
--- 101,105 ----
}
! std::cout << convertToOSIS(verseString, &verseKey) << "\n";
return 0;