24 #pragma warning( disable: 4251 )
70 #ifndef NO_SWORD_NAMESPACE
71 using namespace sword;
153 unsigned int countUTF8 = 0;
157 const unsigned char *p = (
const unsigned char*) txt;
164 unsigned char i = *p;
165 for (count = 0; i & 0x80; count++) {
175 if (count < 2 || count > 4)
return 0;
179 while (--count && *++p) {
182 if ((0xc0 & *p) != 0x80)
return 0;
197 return countUTF8 ? 1 : -1;
207 cout <<
"WARNING(UTF8): " << osisID <<
": Should be converted to UTF-8 (" << text <<
")" << endl;
215 cout <<
"INFO(UTF8): " << osisID <<
": Converting to UTF-8 (" << text <<
")" << endl;
216 converter.processText(text, (SWKey *)2);
227 cout <<
"ERROR(UTF8): " << osisID <<
": Converting to UTF-8 (" << text <<
")" << endl;
232 normalizer.processText(text, (SWKey *)2);
233 if (before != text) {
255 char* s = buf.getRawData();
257 bool inRange =
false;
261 cout <<
"DEBUG(REF): Copy range marker:" << *p << endl;;
271 while (*n && *n !=
':' && *n !=
' ' && *n !=
'-') {
281 cout <<
"DEBUG(REF): Found a work prefix ";
282 for (
char *x = s; x <= n; x++) {
292 cout <<
"DEBUG(REF): Copy osisID:";
295 while (*p && *p !=
'!' && *p !=
' ' && *p !=
'-') {
296 if (
debug & DEBUG_REF) {
303 if (
debug & DEBUG_REF) {
311 while (*n && *n !=
' ' && *n !=
'-') {
315 if (
debug & DEBUG_REF) {
316 cout <<
"DEBUG(REF): Found a grain suffix ";
317 for (
char *x = p; x < n; x++) {
330 inRange = !inRange && *p ==
'-';
332 if (
debug & DEBUG_REF) {
334 cout <<
"DEBUG(REF): Found a range" << endl;
339 if (!inRange && *p ==
' ') {
348 if (
debug & DEBUG_REF) {
349 cout <<
"DEBUG(REF): replacing space with ;. Remaining: " << p << endl;
360 buf.setSize(s - buf.c_str());
363 cout <<
"DEBUG(REF): shortended keyVal to`" << buf.c_str() <<
"`"<< endl;
377 before.setVersificationSystem(
currentVerse.getVersificationSystem());
378 before.setAutoNormalize(
false);
379 before.setIntros(
true);
384 if (!before.getTestament() || !before.getBook() || !before.getChapter() || !before.getVerse()) {
391 after.setVersificationSystem(
currentVerse.getVersificationSystem());
392 after.setAutoNormalize(
true);
403 cout <<
"DEBUG(V11N)[" << caller <<
"]: " << before <<
" normalizes to " << after << endl;
435 saveKey.setVersificationSystem(key.getVersificationSystem());
436 saveKey.setAutoNormalize(
false);
437 saveKey.setIntros(
true);
442 int chapterMax = key.getChapterMax();
443 if (key.getChapter() > chapterMax) {
444 key.setChapter(chapterMax);
450 int verseMax = key.getVerseMax();
451 key.setVerse(verseMax);
454 cout <<
"DEBUG(V11N) Chapter max:" << chapterMax <<
", Verse Max:" << verseMax << endl;
478 cout <<
"INFO(V11N): " << saveKey.getOSISRef()
479 <<
" is not in the " << key.getVersificationSystem()
480 <<
" versification. Appending content to " << key.getOSISRef() << endl;
486 static SWBuf revision; revision.setFormatted(
"<milestone type=\"x-importer\" subType=\"x-osis2mod\" n=\"$Rev: 3769 $ (SWORD: %s)\"/>",
SWVersion::currentVersion.getText());
487 static bool firstOT =
true;
488 static bool firstNT =
true;
498 strcpy(keyOsisID,
"-force");
501 static VerseKey lastKey;
502 lastKey.setVersificationSystem(
currentVerse.getVersificationSystem());
503 lastKey.setAutoNormalize(0);
504 lastKey.setIntros(1);
507 saveKey.setVersificationSystem(
currentVerse.getVersificationSystem());
508 saveKey.setAutoNormalize(0);
509 saveKey.setIntros(1);
525 if ((testmt == 1 && firstOT) || (testmt == 2 && firstNT)) {
527 t.setVersificationSystem(
currentVerse.getVersificationSystem());
528 t.setAutoNormalize(0);
603 saveKey.setVersificationSystem(
currentVerse.getVersificationSystem());
604 saveKey.setAutoNormalize(0);
605 saveKey.setIntros(1);
609 cout <<
"INFO(LINK): Linking " <<
currentVerse.getOSISRef() <<
" to " << dest.getOSISRef() <<
"\n";
620 static bool inBookIntro =
false;
623 static bool inChapterIntro =
false;
626 static bool inChapter =
false;
629 static bool inVerse =
false;
632 static bool inPreVerse =
false;
633 static int genID = 1;
636 static bool inWOC =
false;
638 static XMLTag wocTag =
"<q who=\"Jesus\" marker=\"\">";
641 static bool firstDiv =
false;
642 static bool headerEnded =
false;
646 static SWBuf sidBook =
"";
647 static SWBuf sidChapter =
"";
648 static SWBuf sidVerse =
"";
651 static std::stack<XMLTag> quoteStack;
658 static std::stack<XMLTag> tagStack;
661 static int chapterDepth = 0;
662 static int bookDepth = 0;
663 static int verseDepth = 0;
665 int tagDepth = tagStack.size();
666 SWBuf tokenName = token.
getName();
676 tagStack.push(token);
679 cout <<
"DEBUG(STACK): " <<
currentOsisID <<
": push (" << tagStack.size() <<
") " << token.
getName() << endl;
685 if (headerEnded && (tokenName ==
"div")) {
687 cout <<
"DEBUG(FOUND): Found first div and pitching prior material: " << text << endl;
705 if (tokenName ==
"div" && typeAttr ==
"book") {
706 if (inBookIntro || inChapterIntro) {
709 cout <<
"DEBUG(TITLE): " <<
currentOsisID <<
": OOPS INTRO " << endl;
710 cout <<
"\tinChapterIntro = " << inChapterIntro << endl;
711 cout <<
"\tinBookIntro = " << inBookIntro << endl;
730 inChapterIntro =
false;
733 cout <<
"DEBUG(TITLE): " <<
currentOsisID <<
": Looking for book introduction" << endl;
736 bookDepth = tagStack.size();
742 cout <<
"WARNING(V11N): New book is " << token.
getAttribute(
"osisID") <<
" and is not in " <<
v11n <<
" versification, ignoring" << endl;
745 cout <<
"DEBUG(FOUND): New book is " <<
currentVerse.getOSISRef() << endl;
752 if ((tokenName ==
"chapter") ||
753 (tokenName ==
"div" && typeAttr ==
"chapter")
757 cout <<
"DEBUG(TITLE): " <<
currentOsisID <<
": BOOK INTRO "<< text << endl;
767 cout <<
"DEBUG(FOUND): Current chapter is " <<
currentVerse.getOSISRef() <<
" (" << token.
getAttribute(
"osisID") <<
")" << endl;
777 inChapterIntro =
true;
780 cout <<
"DEBUG(TITLE): " <<
currentOsisID <<
": Looking for chapter introduction" << endl;
783 chapterDepth = tagStack.size();
790 if ((tokenName ==
"verse") ||
791 (tokenName ==
"div" && token.
getAttribute(
"annotateType"))
794 cout <<
"DEBUG(FOUND): Entering verse" << endl;
797 if (inChapterIntro) {
799 cout <<
"DEBUG(TITLE): " <<
currentOsisID <<
": Done looking for chapter introduction" << endl;
803 if (
debug & DEBUG_TITLE) {
804 cout <<
"DEBUG(TITLE): " <<
currentOsisID <<
": CHAPTER INTRO "<< text << endl;
814 sprintf(genBuf,
"<div type=\"x-milestone\" subType=\"x-preverse\" eID=\"pv%d\"/>", genID++);
819 SWBuf keyVal = token.
getAttribute(tokenName ==
"verse" ?
"osisID" :
"annotateRef");
829 int memberKeyCount = verseKeys.getCount();
830 if (memberKeyCount) {
831 verseKeys.setPosition(
TOP);
837 verseKeys.increment(1);
838 if (!verseKeys.popError()) {
841 cout <<
"DEBUG(LINK MASTER): " <<
currentVerse.getOSISRef() << endl;
846 cout <<
"ERROR(REF): Invalid osisID/annotateRef: " << token.
getAttribute((tokenName ==
"verse") ?
"osisID" :
"annotateRef") << endl;
851 if (
debug & DEBUG_OTHER) {
852 cout <<
"DEBUG(FOUND): New current verse is " <<
currentVerse.getOSISRef() << endl;
853 cout <<
"DEBUG(FOUND): osisID/annotateRef is adjusted to: " << keyVal << endl;
860 inChapterIntro =
false;
861 verseDepth = tagStack.size();
864 if (tokenName !=
"verse") {
870 XMLTag t =
"<milestone resp=\"v\" />";
873 for (StringList::iterator loop = attrNames.begin(); loop != attrNames.end(); loop++) {
874 const char* attr = (*loop).c_str();
925 if (tokenName ==
"q") {
926 quoteStack.push(token);
929 cout <<
"DEBUG(QUOTE): " <<
currentOsisID <<
": quote top(" << quoteStack.size() <<
") " << token << endl;
963 if (!inPreVerse && !inBookIntro) {
964 if (inChapterIntro) {
969 if ((tokenName ==
"div" && typeAttr ==
"section") ||
970 (tokenName ==
"title" && typeAttr.length() != 0 && typeAttr !=
"main" && typeAttr !=
"chapter" && typeAttr !=
"sub")
973 cout <<
"DEBUG(TITLE): " <<
currentOsisID <<
": Done looking for chapter introduction" << endl;
977 if (
debug & DEBUG_TITLE) {
978 cout <<
"DEBUG(TITLE): " <<
currentOsisID <<
": CHAPTER INTRO "<< text << endl;
985 inChapterIntro =
false;
990 else if (!inVerse && inChapter) {
996 sprintf(genBuf,
"<div type=\"x-milestone\" subType=\"x-preverse\" sID=\"pv%d\"/>", genID);
1002 if (!inVerse && !inBookIntro && !inChapterIntro) {
1003 cout <<
"DEBUG(INTERVERSE): " <<
currentOsisID <<
": interverse start token " << token <<
":" << text.c_str() << endl;
1013 if (tagStack.empty()) {
1014 cout <<
"FATAL(NESTING): " <<
currentOsisID <<
": tag expected" << endl;
1020 XMLTag topToken = tagStack.top();
1021 tagDepth = tagStack.size();
1024 cout <<
"DEBUG(STACK): " <<
currentOsisID <<
": pop(" << tagDepth <<
") " << topToken.
getName() << endl;
1029 if (tokenName != topToken.
getName()) {
1030 cout <<
"FATAL(NESTING): " <<
currentOsisID <<
": Expected " << topToken.
getName() <<
" found " << tokenName << endl;
1039 if (tokenName ==
"header") {
1043 cout <<
"DEBUG(FOUND): End of header found" << endl;
1052 if ((tokenName ==
"verse") ||
1053 (tokenName ==
"div" && eidAttr == sidVerse)
1056 if (tagDepth != verseDepth) {
1057 cout <<
"WARNING(NESTING): verse " <<
currentOsisID <<
" is not well formed:(" << verseDepth <<
"," << tagDepth <<
")" << endl;
1062 text.append(
"</q>");
1067 if (tokenName !=
"verse") {
1073 XMLTag t =
"<milestone resp=\"v\" />";
1076 for (StringList::iterator loop = attrNames.begin(); loop != attrNames.end(); loop++) {
1077 const char* attr = (*loop).c_str();
1095 if (tokenName ==
"q") {
1096 XMLTag topToken = quoteStack.top();
1099 cout <<
"DEBUG(QUOTE): " <<
currentOsisID <<
": quote pop(" << quoteStack.size() <<
") " << topToken <<
" -- " << token << endl;
1108 if (
debug & DEBUG_QUOTE) {
1109 cout <<
"DEBUG(QUOTE): " <<
currentOsisID <<
": (" << quoteStack.size() <<
") " << topToken <<
" -- " << token << endl;
1121 if (strcmp(sID, eID)) {
1122 cout <<
"ERROR(NESTING): improper nesting " <<
currentOsisID <<
": matching (sID,eID) not found. Looking at (" << sID <<
"," << eID <<
")" << endl;
1137 text.append(
"</q>");
1145 if (!inVerse && !inBookIntro && !inChapterIntro) {
1147 if ((tokenName ==
"chapter") ||
1148 (tokenName ==
"div" && eidAttr == sidChapter)
1160 if (tokenName ==
"div" && eidAttr == sidBook) {
1170 if (tokenName ==
"osisText" || tokenName ==
"osis") {
1184 cout <<
"DEBUG(INTERVERSE): " <<
currentOsisID <<
": appending interverse end tag: " << tokenName <<
"(" << tagDepth <<
"," << chapterDepth <<
"," << bookDepth <<
")" << endl;
1191 cout <<
"DEBUG(INTERVERSE): " <<
currentOsisID <<
": interverse end tag: " << tokenName <<
"(" << tagDepth <<
"," << chapterDepth <<
"," << bookDepth <<
")" << endl;
1216 static std::stack<XMLTag> bspTagStack;
1225 cout <<
"DEBUG(XFORM): " <<
currentOsisID <<
": xform empty " << t << endl;
1234 if (tagName ==
"p") {
1235 t.
setText(
"<div type=\"x-p\" />");
1236 sprintf(buf,
"gen%d", sID++);
1248 else if (tagName ==
"chapter" ||
1249 tagName ==
"closer" ||
1250 (tagName ==
"div" && typeAttr !=
"colophon") ||
1254 tagName ==
"salute" ||
1255 tagName ==
"signed" ||
1256 tagName ==
"speech" ||
1260 sprintf(buf,
"gen%d", sID++);
1263 bspTagStack.push(t);
1266 cout <<
"DEBUG(XFORM): " <<
currentOsisID <<
": xform push (" << bspTagStack.size() <<
") " << t <<
" (tagname=" << tagName <<
")" << endl;
1267 XMLTag topToken = bspTagStack.top();
1268 cout <<
"DEBUG(XFORM): " <<
currentOsisID <<
": xform top(" << bspTagStack.size() <<
") " << topToken << endl;
1272 if (!bspTagStack.empty()) {
1273 XMLTag topToken = bspTagStack.top();
1276 cout <<
"DEBUG(XFORM): " <<
currentOsisID <<
": xform pop(" << bspTagStack.size() <<
") " << topToken << endl;
1284 if (tagName ==
"chapter" ||
1285 tagName ==
"closer" ||
1286 (tagName ==
"div" && topTypeAttr !=
"colophon") ||
1291 tagName ==
"salute" ||
1292 tagName ==
"signed" ||
1293 tagName ==
"speech" ||
1304 cout <<
"FATAL(TAGSTACK): " <<
currentOsisID <<
": closing tag without opening tag" << endl;
1327 destKey.setVersificationSystem(
currentVerse.getVersificationSystem());
1328 destKey.setAutoNormalize(0);
1329 destKey.setIntros(1);
1332 linkKey.setVersificationSystem(
currentVerse.getVersificationSystem());
1333 linkKey.setAutoNormalize(0);
1334 linkKey.setIntros(1);
1335 for (
unsigned int i = 0; i <
linkedVerses.size(); i++) {
1340 verseKeys.setPosition(
TOP);
1341 destKey = verseKeys.getElement();
1342 verseKeys.increment(1);
1344 while (!verseKeys.popError()) {
1345 linkKey = verseKeys.getElement();
1347 verseKeys.increment(1);
1352 void usage(
const char *app,
const char *error = 0,
const bool verboseHelp =
false) {
1354 if (error) fprintf(stderr,
"\n%s: %s\n", app, error);
1356 fprintf(stderr,
"OSIS Bible/commentary module creation tool for The SWORD Project\n");
1357 fprintf(stderr,
"\nusage: %s <output/path> <osisDoc> [OPTIONS]\n", app);
1358 fprintf(stderr,
" <output/path>\t\t an existing folder that the module will be written\n");
1359 fprintf(stderr,
" <osisDoc>\t\t path to the validated OSIS document, or '-' to\n");
1360 fprintf(stderr,
"\t\t\t\t read from standard input\n");
1361 fprintf(stderr,
" -a\t\t\t augment module if exists (default is to create new)\n");
1362 fprintf(stderr,
" -z <l|z|b|x>\t\t compression type (default: none)\n");
1363 fprintf(stderr,
"\t\t\t\t l - LZSS; z - ZIP; b - bzip2; x - xz\n");
1364 fprintf(stderr,
" -b <2|3|4>\t\t compression block size (default: 4)\n");
1365 fprintf(stderr,
"\t\t\t\t 2 - verse; 3 - chapter; 4 - book\n");
1366 fprintf(stderr,
" -l <1-9>\t\t compression level (default varies by compression type)\n");
1367 fprintf(stderr,
" -c <cipher_key>\t encipher module using supplied key\n");
1368 fprintf(stderr,
"\t\t\t\t (default no enciphering)\n");
1371 fprintf(stderr,
" -e <1|2|s>\t\t convert Unicode encoding (default: 1)\n");
1372 fprintf(stderr,
"\t\t\t\t 1 - UTF-8 ; 2 - UTF-16 ; s - SCSU\n");
1373 fprintf(stderr,
" -N\t\t\t do not normalize to NFC\n");
1375 fprintf(stderr,
"\t\t\t\t (default is to convert to UTF-8, if needed,\n");
1376 fprintf(stderr,
"\t\t\t\t and then normalize to NFC)\n");
1377 fprintf(stderr,
"\t\t\t\t Note: UTF-8 texts should be normalized to NFC.\n");
1381 fprintf(stderr,
" -s <2|4>\t\t bytes used to store entry size (default is 2).\n");
1383 fprintf(stderr,
"\t\t\t\t Note: useful for commentaries with very large\n");
1384 fprintf(stderr,
"\t\t\t\t entries in uncompressed modules\n");
1385 fprintf(stderr,
"\t\t\t\t (2 bytes to store size equal 65535 characters)\n");
1387 fprintf(stderr,
" -v <v11n>\t\t specify a versification scheme to use (default is KJV)\n");
1388 fprintf(stderr,
"\t\t\t\t Note: The following are valid values for v11n:");
1392 for (StringList::iterator loop = av11n.begin(); loop != av11n.end(); loop++) {
1393 if ((distance(av11n.begin(), loop) % 3) == 0) {
1394 fprintf(stderr,
"\n\t\t\t\t %-12s", (*loop).c_str());
1397 fprintf(stderr,
"\t%-12s", (*loop).c_str());
1400 fprintf(stderr,
"\n");
1403 fprintf(stderr,
" -d <flags>\t\t turn on debugging (default is 0)\n");
1404 fprintf(stderr,
"\t\t\t\t Note: This flag may change in the future.\n");
1405 fprintf(stderr,
"\t\t\t\t Flags: The following are valid values:\n");
1406 fprintf(stderr,
"\t\t\t\t\t0 - no debugging\n");
1407 fprintf(stderr,
"\t\t\t\t\t1 - writes to module, very verbose\n");
1408 fprintf(stderr,
"\t\t\t\t\t2 - verse start and end\n");
1409 fprintf(stderr,
"\t\t\t\t\t4 - quotes, esp. Words of Christ\n");
1410 fprintf(stderr,
"\t\t\t\t\t8 - titles\n");
1411 fprintf(stderr,
"\t\t\t\t\t16 - inter-verse material\n");
1412 fprintf(stderr,
"\t\t\t\t\t32 - BSP to BCV transformations\n");
1413 fprintf(stderr,
"\t\t\t\t\t64 - v11n exceptions\n");
1414 fprintf(stderr,
"\t\t\t\t\t128 - parsing of osisID and osisRef\n");
1415 fprintf(stderr,
"\t\t\t\t\t256 - internal stack\n");
1416 fprintf(stderr,
"\t\t\t\t\t512 - miscellaneous\n");
1417 fprintf(stderr,
"\t\t\t\t This argument can be used more than once. (Or\n");
1418 fprintf(stderr,
"\t\t\t\t the flags may be added together.)\n");
1420 fprintf(stderr,
" -h \t\t\t print verbose usage text\n");
1422 fprintf(stderr,
"\n");
1423 fprintf(stderr,
"See http://www.crosswire.org/wiki/osis2mod for more details.\n");
1424 fprintf(stderr,
"\n");
1431 CS_SEEN_STARTING_EXCLAMATION,
1432 CS_SEEN_STARTING_HYPHEN,
1434 CS_SEEN_ENDING_HYPHEN,
1435 CS_SEEN_SECOND_ENDING_HYPHEN,
1436 CS_SEEN_ENDING_GREATER_THAN
1461 bool incomment =
false;
1462 t_commentstate commentstate = CS_NOT_IN_COMMENT;
1463 bool intoken =
false;
1464 bool inWhitespace =
false;
1465 bool seeingSpace =
false;
1466 unsigned char curChar =
'\0';
1468 bool inentity =
false;
1469 t_entitytype entitytype = ET_NONE;
1470 unsigned char attrQuoteChar =
'\0';
1471 bool inattribute =
false;
1472 unsigned int linePos = 1;
1473 unsigned int charPos = 0;
1475 while (infile.good()) {
1477 int possibleChar = infile.get();
1480 if (possibleChar == -1) {
1484 curChar = (
unsigned char) possibleChar;
1488 if (curChar ==
'\n') {
1508 if (inattribute && (curChar ==
'\'' || curChar ==
'"')) {
1509 if (attrQuoteChar == curChar) {
1510 inattribute =
false;
1511 attrQuoteChar =
'\0';
1514 attrQuoteChar = curChar;
1517 if (intoken && curChar ==
'=') {
1519 attrQuoteChar =
'\0';
1522 if (!inentity && curChar ==
'&') {
1524 entitytype = ET_NONE;
1530 if (curChar ==
';') {
1534 switch (entitytype) {
1538 if (curChar ==
'x' || curChar ==
'X') {
1539 entitytype = ET_HEX;
1542 if (curChar ==
'#') {
1543 entitytype = ET_NUM;
1546 if ((curChar >=
'A' && curChar <=
'Z') ||
1547 (curChar >=
'a' && curChar <=
'z') ||
1548 (curChar >=
'0' && curChar <=
'9')) {
1549 entitytype = ET_CHAR;
1553 entitytype = ET_ERR;
1558 if (!(curChar >=
'0' && curChar <=
'9')) {
1560 entitytype = ET_ERR;
1564 if ((curChar >=
'G' && curChar <=
'Z') ||
1565 (curChar >=
'g' && curChar <=
'z')) {
1567 entitytype = ET_CHAR;
1570 if (!((curChar >=
'A' && curChar <=
'F') ||
1571 (curChar >=
'a' && curChar <=
'f') ||
1572 (curChar >=
'0' && curChar <=
'9'))) {
1574 entitytype = ET_ERR;
1578 if (!((curChar >=
'A' && curChar <=
'Z') ||
1579 (curChar >=
'a' && curChar <=
'z') ||
1580 (curChar >=
'0' && curChar <=
'9'))) {
1582 entitytype = ET_ERR;
1586 cout <<
"FATAL(ENTITY): unknown entitytype on entity end: " << entitytype << endl;
1591 if (entitytype != ET_ERR) {
1592 entityToken.append((
char) curChar);
1599 switch (entitytype) {
1603 cout <<
"WARNING(PARSE): malformed entity, replacing &" << entityToken <<
" with &" << entityToken << endl;
1605 token.append(
"&");
1606 token.append(entityToken);
1609 text.append(
"&");
1610 text.append(entityToken);
1614 if (entityToken[1] !=
'x') {
1615 cout <<
"WARNING(PARSE): HEX entity must begin with &x, found " << entityToken << endl;
1618 cout <<
"WARNING(PARSE): SWORD does not search HEX entities, found " << entityToken << endl;
1622 if (strcmp(entityToken,
"&") &&
1623 strcmp(entityToken,
"<") &&
1624 strcmp(entityToken,
">") &&
1625 strcmp(entityToken,
""") &&
1626 strcmp(entityToken,
"'")) {
1627 cout <<
"WARNING(PARSE): XML only supports 5 Character entities &, <, >, " and ', found " << entityToken << endl;
1630 if (!strcmp(entityToken,
"'")) {
1631 cout <<
"WARNING(PARSE): While valid for XML, XHTML does not support '." << endl;
1633 cout <<
"WARNING(PARSE): ' is unnecessary outside of attribute values. Replacing with '. " << endl;
1637 switch (attrQuoteChar) {
1639 cout <<
"WARNING(PARSE): ' is unnecessary inside double quoted attribute values. Replacing with '. " << endl;
1643 cout <<
"WARNING(PARSE): ' is only needed within single quoted attribute values. Considering using double quoted attribute and replacing with '." << endl;
1649 if (!strcmp(entityToken,
""")) {
1650 cout <<
"WARNING(PARSE): While valid for XML, " is only needed within double quoted attribute values" << endl;
1652 cout <<
"WARNING(PARSE): " is unnecessary outside of attribute values. Replace with \"." << endl;
1656 switch (attrQuoteChar) {
1658 cout <<
"WARNING(PARSE): " is only needed within double quoted attribute values. Considering using single quoted attribute and replacing with \"." << endl;
1661 cout <<
"WARNING(PARSE): " is unnecessary inside single quoted attribute values. Replace with \"." << endl;
1669 cout <<
"WARNING(PARSE): SWORD does not search numeric entities, found " << entityToken << endl;
1678 token.append(entityToken);
1681 text.append(entityToken);
1684 if (curChar ==
';') {
1696 if (!intoken && curChar ==
'<') {
1699 inattribute =
false;
1700 attrQuoteChar =
'\0';
1705 if (intoken && !incomment) {
1706 switch (commentstate) {
1707 case CS_NOT_IN_COMMENT :
1708 if (curChar ==
'!') {
1709 commentstate = CS_SEEN_STARTING_EXCLAMATION;
1710 token.append((
char) curChar);
1716 case CS_SEEN_STARTING_EXCLAMATION :
1717 if (curChar ==
'-') {
1718 commentstate = CS_SEEN_STARTING_HYPHEN;
1719 token.append((
char) curChar);
1722 commentstate = CS_NOT_IN_COMMENT;
1726 case CS_SEEN_STARTING_HYPHEN :
1727 if (curChar ==
'-') {
1729 commentstate = CS_IN_COMMENT;
1730 token.append((
char) curChar);
1733 cout <<
"DEBUG(COMMENTS): in comment" << endl;
1738 commentstate = CS_NOT_IN_COMMENT;
1743 cout <<
"FATAL(COMMENTS): unknown commentstate on comment start: " << commentstate << endl;
1749 switch (commentstate) {
1751 if (curChar ==
'-') {
1752 commentstate = CS_SEEN_ENDING_HYPHEN;
1759 case CS_SEEN_ENDING_HYPHEN :
1760 if (curChar ==
'-') {
1761 commentstate = CS_SEEN_SECOND_ENDING_HYPHEN;
1765 commentstate = CS_IN_COMMENT;
1769 case CS_SEEN_SECOND_ENDING_HYPHEN :
1770 if (curChar ==
'>') {
1773 commentstate = CS_NOT_IN_COMMENT;
1776 cout <<
"DEBUG(COMMENTS): out of comment" << endl;
1782 commentstate = CS_IN_COMMENT;
1787 cout <<
"FATAL(COMMENTS): unknown commentstate on comment end: " << commentstate << endl;
1794 seeingSpace = isspace(curChar)!=0;
1802 inWhitespace = seeingSpace;
1805 if (intoken && curChar ==
'>') {
1807 inWhitespace =
false;
1810 if (isalpha(token[1]) ||
1811 (((token[1] ==
'/') || (token[1] ==
'?')) && isalpha(token[2]))) {
1819 cout <<
"WARNING(PARSE): malformed token: " << token << endl;
1825 token.append((
char) curChar);
1829 case '>' : cout <<
"WARNING(PARSE): > should be >" << endl; text.append(
">");
break;
1830 case '<' : cout <<
"WARNING(PARSE): < should be <" << endl; text.append(
"<");
break;
1831 default : text.append((
char) curChar);
break;
1842 if (
converted) fprintf(stderr,
"osis2mod converted %d verses to UTF-8\n",
converted);
1852 for (
int i = 1; i < argc; i++) {
1853 if (!strcmp(argv[i],
"-h") || !strcmp(argv[i],
"--help")) {
1854 usage(*argv,
"",
true);
1865 const char* program = argv[0];
1866 const char* path = argv[1];
1867 const char* osisDoc = argv[2];
1869 SWBuf compType =
"";
1870 bool isCommentary =
false;
1873 SWBuf cipherKey =
"";
1877 for (
int i = 3; i < argc; i++) {
1878 if (!strcmp(argv[i],
"-a")) {
1881 else if (!strcmp(argv[i],
"-z")) {
1883 if (i+1 < argc && argv[i+1][0] !=
'-') {
1884 switch (argv[++i][0]) {
1885 case 'l': compType =
"LZSS";
break;
1886 case 'z': compType =
"ZIP";
break;
1887 case 'b': compType =
"BZIP2";
break;
1888 case 'x': compType =
"XZ";
break;
1892 else if (!strcmp(argv[i],
"-Z")) {
1893 if (compType.size())
usage(*argv,
"Cannot specify both -z and -Z");
1896 else if (!strcmp(argv[i],
"-b")) {
1898 iType = atoi(argv[++i]);
1899 if ((iType >= 2) && (iType <= 4))
continue;
1901 usage(*argv,
"-b requires one of <2|3|4>");
1903 else if (!strcmp(argv[i],
"-N")) {
1906 else if (!strcmp(argv[i],
"-e")) {
1908 switch (argv[++i][0]) {
1930 else if (!strcmp(argv[i],
"-c")) {
1931 if (i+1 < argc) cipherKey = argv[++i];
1932 else usage(*argv,
"-c requires <cipher_key>");
1934 else if (!strcmp(argv[i],
"-v")) {
1935 if (i+1 < argc)
v11n = argv[++i];
1936 else usage(*argv,
"-v requires <v11n>");
1938 else if (!strcmp(argv[i],
"-s")) {
1940 entrySize = atoi(argv[++i]);
1941 if (entrySize == 2 || entrySize == 4) {
1945 usage(*argv,
"-s requires one of <2|4>");
1947 else if (!strcmp(argv[i],
"-C")) {
1948 isCommentary =
true;
1950 else if (!strcmp(argv[i],
"-d")) {
1951 if (i+1 < argc)
debug |= atoi(argv[++i]);
1952 else usage(*argv,
"-d requires <flags>");
1954 else if (!strcmp(argv[i],
"-l")) {
1956 compLevel = atoi(argv[++i]);
1958 else usage(*argv,
"-l requires a value from 1-9");
1960 if (compLevel < 0 || compLevel > 10) {
1961 usage(*argv,
"-l requires a value from 1-9");
1964 else usage(*argv, (((SWBuf)
"Unknown argument: ")+ argv[i]).c_str());
1967 if (isCommentary) isCommentary =
true;
1969 if (compType ==
"LZSS") {
1972 else if (compType ==
"ZIP") {
1974 compressor =
new ZipCompress();
1976 usage(*argv,
"ERROR: SWORD library not compiled with ZIP compression support.\n\tBe sure libz is available when compiling SWORD library");
1979 else if (compType ==
"BZIP2") {
1980 #ifndef EXCLUDEBZIP2
1983 usage(*argv,
"ERROR: SWORD library not compiled with bzip2 compression support.\n\tBe sure libbz2 is available when compiling SWORD library");
1986 else if (compType ==
"XZ") {
1990 usage(*argv,
"ERROR: SWORD library not compiled with xz compression support.\n\tBe sure liblzma is available when compiling SWORD library");
1994 if (compressor && compLevel > 0) {
2001 cout <<
"WARNING(UTF8): " << program <<
" is not compiled with support for ICU. Assuming -N." << endl;
2006 cout <<
"DEBUG(ARGS):\n\tpath: " << path <<
"\n\tosisDoc: " << osisDoc <<
"\n\tcreate: " << append <<
"\n\tcompressType: " << compType <<
"\n\tblockType: " << iType <<
"\n\tcompressLevel: " << compLevel <<
"\n\tcipherKey: " << cipherKey.c_str() <<
"\n\tnormalize: " <<
normalize << endl;
2013 if (entrySize == 4) {
2015 fprintf(stderr,
"ERROR: %s: couldn't create module at path: %s \n", program, path);
2021 fprintf(stderr,
"ERROR: %s: couldn't create module at path: %s \n", program, path);
2026 else if (entrySize == 4) {
2028 fprintf(stderr,
"ERROR: %s: couldn't create module at path: %s \n", program, path);
2034 fprintf(stderr,
"ERROR: %s: couldn't create module at path: %s \n", program, path);
2042 if (entrySize == 4) {
2077 else if (entrySize == 4) {
2110 if (cipherKey.length()) {
2111 fprintf(stderr,
"Adding cipher filter with phrase: %s\n", cipherKey.c_str() );
2117 fprintf(stderr,
"The module is not writable. Writing text to it will not work.\nExiting.\n" );
2123 if (!strcmp(osisDoc,
"-")) {
2128 ifstream infile(osisDoc);
2129 if (infile.fail()) {
2130 fprintf(stderr,
"ERROR: %s: couldn't open input file: %s \n", program, osisDoc);
2139 delete cipherFilter;
2145 fprintf(stderr,
"SUCCESS: %s: has finished its work and will now rest\n", program);
void prepareSWVerseKey(SWBuf &buf)
static char createModule(const char *path, const char *v11n="KJV")
void setEmpty(bool value)
const char * setAttribute(const char *attribName, const char *attribValue, int partNum=-1, char partSplit= '|')
void makeValidRef(VerseKey &key)
void processOSIS(istream &infile)
virtual void setPosition(SW_POSITION pos)
virtual void setLevel(int l)
virtual void setEntry(const char *inbuf, long len=-1)
const char * getName() const
bool handleToken(SWBuf &text, XMLTag token)
static bool inCanonicalOSISBook
virtual char setKey(const SWKey *ikey)
bool isOSISAbbrev(const char *buf)
const StringList getAttributeNames() const
virtual void linkEntry(const SWKey *sourceKey)
int detectUTF8(const char *txt)
void prepareSWText(const char *osisID, SWBuf &text)
std::list< SWBuf > StringList
void setText(const char *tagString)
virtual bool isWritable() const
std::vector< ListKey > linkedVerses
void writeEntry(SWModule *book, SWBuf keyBuffer, SWBuf entBuffer)
virtual bool hasEntry(const SWKey *) const
static char createModule(const char *path, const char *v11n="KJV")
int getBookNumberByOSISName(const char *bookName) const
const StringList getVersificationSystems() const
virtual SWModule & addRawFilter(SWFilter *newFilter)
const int EXIT_BAD_NESTING
const int DEBUG_INTERVERSE
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)=0
const char * getAttribute(const char *attribName, int partNum=-1, char partSplit= '|') const
static char createModule(const char *path, int blockBound, const char *v11n="KJV")
void usage(const char *app)
static SWVersion currentVersion
bool isEndTag(const char *eID=0) const
bool isValidRef(const char *buf, const char *caller)
const char * getRawEntry() const
static VersificationMgr * getSystemVersificationMgr()
static char createModule(const char *path, int blockBound, const char *v11n="KJV")
const System * getVersificationSystem(const char *name) const
XMLTag transformBSP(XMLTag t)
void linkToEntry(VerseKey &linkKey, VerseKey &dest)