[sword-svn] r2399 - trunk/utilities
dmsmith at crosswire.org
dmsmith at crosswire.org
Sun May 10 18:21:41 MST 2009
Author: dmsmith
Date: 2009-05-10 18:21:41 -0700 (Sun, 10 May 2009)
New Revision: 2399
Modified:
trunk/utilities/osis2mod.cpp
Log:
Osis2mod improvements:
1) changed -4 to -s <2|4>, matching tei2mod. -4 was added recently, so the change had not been released.
2) API-113 resolved by adding the ability to read from standard input.
Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp 2009-05-09 23:03:59 UTC (rev 2398)
+++ trunk/utilities/osis2mod.cpp 2009-05-11 01:21:41 UTC (rev 2399)
@@ -80,6 +80,7 @@
SWText *module = 0;
VerseKey currentVerse;
+SWBuf v11n = "KJV";
char activeOsisID[255];
char currentOsisID[255];
@@ -94,8 +95,8 @@
bool isOSISAbbrev(const char *buf) {
VerseMgr *vmgr = VerseMgr::getSystemVerseMgr();
- const VerseMgr::System *v11n = vmgr->getVersificationSystem(currentVerse.getVersificationSystem());
- return v11n->getBookNumberByOSISName(buf) >= 0;
+ const VerseMgr::System *av11n = vmgr->getVersificationSystem(currentVerse.getVersificationSystem());
+ return av11n->getBookNumberByOSISName(buf) >= 0;
}
/**
@@ -1258,6 +1259,8 @@
if (error) fprintf(stderr, "\n%s: %s\n", app, error);
fprintf(stderr, "\nusage: %s <output/path> <osisDoc> [OPTIONS]\n", app);
+ fprintf(stderr, " <output/path>\t\t an existing folder that the module will be written\n");
+ fprintf(stderr, " <osisDoc>\t\t path to the validated OSIS document, or '-' to read from standard input\n");
fprintf(stderr, " -a\t\t\t augment module if exists (default is to create new)\n");
fprintf(stderr, " -z\t\t\t use ZIP compression (default no compression)\n");
fprintf(stderr, " -Z\t\t\t use LZSS compression (default no compression)\n");
@@ -1269,7 +1272,7 @@
fprintf(stderr, "\t\t\t\t (default is to convert to UTF-8, if needed,\n");
fprintf(stderr, "\t\t\t\t and then normalize to NFC)\n");
fprintf(stderr, "\t\t\t\t Note: UTF-8 texts should be normalized to NFC.\n");
- fprintf(stderr, " -4\t\t\t use 4 byte size entries (default is 2).\n");
+ fprintf(stderr, " -s <2|4>\t\t max text size per entry (default is 2).\n");
fprintf(stderr, "\t\t\t\t Note: useful for commentaries with very large entries\n");
fprintf(stderr, "\t\t\t\t in uncompressed modules (default is 65535 bytes)\n");
fprintf(stderr, " -v <v11n>\t\t specify a versification scheme to use (default is KJV)\n");
@@ -1302,6 +1305,93 @@
exit(EXIT_BAD_ARG);
}
+void processOSIS(istream& infile) {
+ activeOsisID[0] = '\0';
+
+ strcpy(currentOsisID,"N/A");
+
+ currentVerse.setVersificationSystem(v11n);
+ currentVerse.AutoNormalize(0);
+ currentVerse.Headings(1); // turn on mod/testmnt/book/chap headings
+ currentVerse.Persist(1);
+
+ module->setKey(currentVerse);
+ module->setPosition(TOP);
+
+ SWBuf token;
+ SWBuf text;
+ bool intoken = false;
+ bool inWhitespace = false;
+ bool seeingSpace = false;
+ char curChar = '\0';
+
+ while (infile.good()) {
+
+ curChar = infile.get();
+
+ // skip the character if it is bad. infile.good() will catch the problem
+ if (curChar == -1) {
+ continue;
+ }
+
+ if (!intoken && curChar == '<') {
+ intoken = true;
+ token = "<";
+ continue;
+ }
+
+ // Outside of tokens merge adjacent whitespace
+ if (!intoken) {
+ seeingSpace = isspace(curChar);
+ if (seeingSpace) {
+ if (inWhitespace) {
+ continue;
+ }
+ // convert all whitespace to blanks
+ curChar = ' ';
+ }
+ inWhitespace = seeingSpace;
+ }
+
+ if (intoken && curChar == '>') {
+ intoken = false;
+ inWhitespace = false;
+ token.append('>');
+ // take this isalpha if out to check for bugs in text
+ if ((isalpha(token[1])) || (isalpha(token[2]))) {
+ //cout << "Handle:" << token.c_str() << endl;
+ XMLTag t = transformBSP(token.c_str());
+
+ if (!handleToken(text, t)) {
+ text.append(t);
+ }
+ }
+ continue;
+ }
+
+ if (intoken) {
+ token.append(curChar);
+ }
+ else {
+ switch (curChar) {
+ case '>' : text.append(">"); break;
+ case '<' : text.append("<"); break;
+ default : text.append(curChar); break;
+ }
+ }
+ }
+
+ // Force the last entry from the text buffer.
+ text = "";
+ writeEntry(text, true);
+ writeLinks();
+
+#ifdef _ICU_
+ if (converted) fprintf(stderr, "osis2mod converted %d verses to UTF-8\n", converted);
+ if (normalized) fprintf(stderr, "osis2mod normalized %d verses to NFC\n", normalized);
+#endif
+}
+
int main(int argc, char **argv) {
fprintf(stderr, "You are running osis2mod: $Rev$\n");
@@ -1312,16 +1402,15 @@
}
// variables for arguments, holding defaults
- const char* program = argv[0];
- const char* path = argv[1];
- const char* osisDoc = argv[2];
- int append = 0;
- int compType = 0;
- int iType = 4;
- int largeEntry = 0;
- SWBuf cipherKey = "";
- SWBuf v11n = "KJV";
-
+ const char* program = argv[0];
+ const char* path = argv[1];
+ const char* osisDoc = argv[2];
+ int append = 0;
+ SWBuf compType = "";
+ bool isCommentary = false;
+ int iType = 4;
+ int entrySize = 0;
+ SWBuf cipherKey = "";
SWCompress *compressor = 0;
for (int i = 3; i < argc; i++) {
@@ -1329,14 +1418,14 @@
append = 1;
}
else if (!strcmp(argv[i], "-z")) {
- if (compType) usage(*argv, "Cannot specify both -z and -Z");
- if (largeEntry) usage(*argv, "Cannot specify both -z and -4");
- compType = 2;
+ if (compType.size()) usage(*argv, "Cannot specify both -z and -Z");
+ if (entrySize) usage(*argv, "Cannot specify both -z and -s");
+ compType = "ZIP";
}
else if (!strcmp(argv[i], "-Z")) {
- if (compType) usage(*argv, "Cannot specify both -z and -Z");
- if (largeEntry) usage(*argv, "Cannot specify both -Z and -4");
- compType = 1;
+ if (compType.size()) usage(*argv, "Cannot specify both -z and -Z");
+ if (entrySize) usage(*argv, "Cannot specify both -Z and -s");
+ compType = "LZSS";
}
else if (!strcmp(argv[i], "-b")) {
if (i+1 < argc) {
@@ -1356,10 +1445,19 @@
if (i+1 < argc) v11n = argv[++i];
else usage(*argv, "-v requires <v11n>");
}
- else if (!strcmp(argv[i], "-4")) {
- if (compType) usage(*argv, "Cannot specify -4 and -z or -Z");
- largeEntry = 1;
+ else if (!strcmp(argv[i], "-s")) {
+ if (compType.size()) usage(*argv, "Cannot specify -s and -z or -Z");
+ if (i+1 < argc) {
+ entrySize = atoi(argv[++i]);
+ if (entrySize == 2 || entrySize == 4) {
+ continue;
+ }
+ }
+ usage(*argv, "-s requires one of <2|4>");
}
+ else if (!strcmp(argv[i], "-C")) {
+ isCommentary = true;
+ }
#ifdef DEBUG
else if (!strcmp(argv[i], "-d")) {
if (i+1 < argc) debug |= atoi(argv[++i]);
@@ -1369,11 +1467,12 @@
else usage(*argv, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str());
}
- switch (compType) { // these are deleted by zText
- case 0: break;
- case 1: compressor = new LZSSCompress(); break;
- case 2: compressor = new ZipCompress(); break;
- }
+ if (compType == "ZIP") {
+ compressor = new ZipCompress();
+ }
+ else if (compType = "LZSS") {
+ compressor = new LZSSCompress();
+ }
#ifndef _ICU_
if (normalize) {
@@ -1388,7 +1487,6 @@
}
#endif
-
if (!append) { // == 0 then create module
// Try to initialize a default set of datafiles and indicies at our
// datapath location passed to us from the user.
@@ -1398,7 +1496,7 @@
exit(EXIT_NO_CREATE);
}
}
- else if (largeEntry) {
+ else if (entrySize == 4) {
if (RawText4::createModule(path, v11n)) {
fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", program, path);
exit(EXIT_NO_CREATE);
@@ -1412,13 +1510,6 @@
}
}
- // Let's see if we can open our input file
- ifstream infile(osisDoc);
- if (infile.fail()) {
- fprintf(stderr, "ERROR: %s: couldn't open input file: %s \n", program, osisDoc);
- exit(EXIT_NO_READ);
- }
-
// Do some initialization stuff
if (compressor) {
// Create a compressed text module allowing very large entries
@@ -1437,7 +1528,7 @@
v11n // versification
);
}
- else if (largeEntry) {
+ else if (entrySize == 4) {
// Create a raw text module allowing very large entries
// Taking defaults except for first and last argument
module = new RawText4(
@@ -1481,95 +1572,26 @@
exit(EXIT_NO_WRITE);
}
- activeOsisID[0] = '\0';
-
- strcpy(currentOsisID,"N/A");
-
- currentVerse.setVersificationSystem(v11n);
- currentVerse.AutoNormalize(0);
- currentVerse.Headings(1); // turn on mod/testmnt/book/chap headings
- currentVerse.Persist(1);
-
- module->setKey(currentVerse);
- module->setPosition(TOP);
-
- SWBuf token;
- SWBuf text;
- bool intoken = false;
- bool inWhitespace = false;
- bool seeingSpace = false;
- char curChar = '\0';
-
- while (infile.good()) {
-
- curChar = infile.get();
-
- // skip the character if it is bad. infile.good() will catch the problem
- if (curChar == -1) {
- continue;
+ // Either read from std::cin (aka stdin), when the argument is a '-'
+ // or from a specified file.
+ if (!strcmp(osisDoc, "-")) {
+ processOSIS(cin);
+ }
+ else {
+ // Let's see if we can open our input file
+ ifstream infile(osisDoc);
+ if (infile.fail()) {
+ fprintf(stderr, "ERROR: %s: couldn't open input file: %s \n", program, osisDoc);
+ exit(EXIT_NO_READ);
}
-
- if (!intoken && curChar == '<') {
- intoken = true;
- token = "<";
- continue;
- }
-
- // Outside of tokens merge adjacent whitespace
- if (!intoken) {
- seeingSpace = isspace(curChar);
- if (seeingSpace) {
- if (inWhitespace) {
- continue;
- }
- // convert all whitespace to blanks
- curChar = ' ';
- }
- inWhitespace = seeingSpace;
- }
-
- if (intoken && curChar == '>') {
- intoken = false;
- inWhitespace = false;
- token.append('>');
- // take this isalpha if out to check for bugs in text
- if ((isalpha(token[1])) || (isalpha(token[2]))) {
- //cout << "Handle:" << token.c_str() << endl;
- XMLTag t = transformBSP(token.c_str());
-
- if (!handleToken(text, t)) {
- text.append(t);
- }
- }
- continue;
- }
-
- if (intoken) {
- token.append(curChar);
- }
- else {
- switch (curChar) {
- case '>' : text.append(">"); break;
- case '<' : text.append("<"); break;
- default : text.append(curChar); break;
- }
- }
+ processOSIS(infile);
+ infile.close();
}
- // Force the last entry from the text buffer.
- text = "";
- writeEntry(text, true);
- writeLinks();
-
delete module;
if (cipherFilter)
delete cipherFilter;
- infile.close();
-#ifdef _ICU_
- if (converted) fprintf(stderr, "osis2mod converted %d verses to UTF-8\n", converted);
- if (normalized) fprintf(stderr, "osis2mod normalized %d verses to NFC\n", normalized);
-#endif
exit(0); // success
}
More information about the sword-cvs
mailing list