53 #pragma warning( disable: 4251 )
83 #ifndef NO_SWORD_NAMESPACE
84 using namespace sword;
132 unsigned int countUTF8 = 0;
136 const unsigned char *p = (
const unsigned char*) txt;
143 unsigned char i = *p;
144 for (count = 0; i & 0x80; count++) {
154 if (count < 2 || count > 4)
return 0;
158 while (--count && *++p) {
161 if ((0xc0 & *p) != 0x80)
return 0;
176 return countUTF8 ? 1 : -1;
186 cout <<
"Warning: " << key <<
": Converting to UTF-8 (" << text <<
")" << endl;
187 converter.processText(text, (SWKey *)2);
198 cout <<
"Error: " << key <<
": Converting to UTF-8 (" << text <<
")" << endl;
203 normalizer->processText(text, (SWKey *)2);
204 if (before != text) {
214 cout <<
"(" <<
entryCount <<
") " << key << endl;
225 SWKey tmpkey = linkBuf.c_str();
228 cout <<
"(" <<
entryCount <<
") " <<
"Linking: " << linkBuf << endl;
239 static bool inEntry =
false;
240 static bool inEntryFree =
false;
241 static bool inSuperEntry =
false;
243 const char *tokenName = token->
getName();
245 static const char *splitPtr, *splitPtr2 =
NULL;
246 static char *splitBuffer =
new char[4096];
252 if (!inEntry && !inEntryFree && !inSuperEntry) {
253 inEntry = !strcmp(tokenName,
"entry");
254 inEntryFree = !strcmp(tokenName,
"entryFree");
255 inSuperEntry = !strcmp(tokenName,
"superentry");
256 if (inEntry || inEntryFree || inSuperEntry) {
258 cout <<
"Entering " << tokenName << endl;
281 if ((inEntry && !strcmp(tokenName,
"entry" )) ||
282 (inEntryFree && !strcmp(tokenName,
"entryFree" )) ||
283 (inSuperEntry && !strcmp(tokenName,
"superentry"))) {
285 cout <<
"Leaving " << tokenName << endl;
291 inSuperEntry =
false;
296 cout <<
"keyStr: " <<
keyStr << endl;
298 splitPtr = strstr(
keyStr,
"|");
300 strncpy (splitBuffer,
keyStr.c_str(), splitPtr -
keyStr.c_str());
301 splitBuffer[splitPtr -
keyStr.c_str()] = 0;
304 cout <<
"splitBuffer: " << splitBuffer << endl;
305 cout <<
"currentKey: " << *
currentKey << endl;
311 splitPtr2 = strstr(splitPtr,
"|");
314 strncpy (splitBuffer, splitPtr, splitPtr2 - splitPtr);
315 splitBuffer[splitPtr2 - splitPtr] = 0;
317 cout <<
"splitBuffer: " << splitBuffer << endl;
318 cout <<
"currentKey: " << *
currentKey << endl;
321 splitPtr = splitPtr2;
324 strcpy (splitBuffer, splitPtr);
326 cout <<
"splitBuffer: " << splitBuffer << endl;
327 cout <<
"currentKey: " << *
currentKey << endl;
349 void usage(
const char *app,
const char *error = 0) {
351 if (error) fprintf(stderr,
"\n%s: %s\n", app, error);
353 fprintf(stderr,
"TEI Lexicon/Dictionary/Daily Devotional/Glossary module creation tool for\n\tThe SWORD Project\n");
354 fprintf(stderr,
"\nusage: %s <output/path> <teiDoc> [OPTIONS]\n", app);
355 fprintf(stderr,
" -z <l|z|b|x>\t\t use compression (default: none)\n");
356 fprintf(stderr,
"\t\t\t\t l - LZSS; z - ZIP; b - bzip2; x - xz\n");
357 fprintf(stderr,
" -s <2|4>\t\t max text size per entry (default: 4)\n");
358 fprintf(stderr,
" -c <cipher_key>\t encipher module using supplied key\n");
359 fprintf(stderr,
"\t\t\t\t (default: none)\n");
360 fprintf(stderr,
" -N\t\t\t Do not convert UTF-8 or normalize UTF-8 to NFC\n");
361 fprintf(stderr,
"\t\t\t\t (default is to convert to UTF-8, if needed,\n");
362 fprintf(stderr,
"\t\t\t\t and then normalize to NFC. Note: all UTF-8\n");
363 fprintf(stderr,
"\t\t\t\t texts should be normalized to NFC.)\n");
364 fprintf(stderr,
"\n\tThe options -z and -s are mutually exclusive.\n");
368 int main(
int argc,
char **argv) {
371 normalizer = &normalizr;
374 SWBuf program = argv[0];
375 fprintf(stderr,
"You are running %s: $Rev: 3416 $\n", argv[0]);
383 SWBuf path = argv[1];
384 SWBuf teiDoc = argv[2];
387 SWBuf recommendedPath =
"./modules/lexdict/";
388 SWBuf cipherKey =
"";
391 for (
int i = 3; i < argc; i++) {
392 if (!strcmp(argv[i],
"-z")) {
393 if (modDrv.size())
usage(*argv,
"Cannot specify both -z and -s");
395 if (i+1 < argc && argv[i+1][0] !=
'-') {
396 switch (argv[++i][0]) {
397 case 'l': compType =
"LZSS";
break;
398 case 'z': compType =
"ZIP";
break;
399 case 'b': compType =
"BZIP2";
break;
400 case 'x': compType =
"XZ";
break;
404 recommendedPath +=
"zld/";
406 else if (!strcmp(argv[i],
"-Z")) {
407 if (compType.size())
usage(*argv,
"Cannot specify both -z and -Z");
408 if (modDrv.size())
usage(*argv,
"Cannot specify both -Z and -s");
411 recommendedPath +=
"zld/";
413 else if (!strcmp(argv[i],
"-s")) {
414 if (compType.size())
usage(*argv,
"Cannot specify both -s and -z");
416 int size = atoi(argv[++i]);
419 recommendedPath +=
"rawld/";
424 recommendedPath +=
"rawld4/";
428 usage(*argv,
"-s requires one of <2|4>");
430 else if (!strcmp(argv[i],
"-N")) {
433 else if (!strcmp(argv[i],
"-c")) {
434 if (i+1 < argc) cipherKey = argv[++i];
435 else usage(*argv,
"-c requires <cipher_key>");
437 else usage(*argv, (((SWBuf)
"Unknown argument: ")+ argv[i]).c_str());
439 if (!modDrv.size()) {
441 recommendedPath +=
"rawld4/";
447 cout << program <<
" is not compiled with support for ICU. Setting -N flag." << endl;
451 if (compType ==
"LZSS") {
454 else if (compType ==
"ZIP") {
456 compressor =
new ZipCompress();
458 usage(*argv,
"ERROR: SWORD library not compiled with ZIP compression support.\n\tBe sure libz is available when compiling SWORD library");
461 else if (compType ==
"BZIP2") {
465 usage(*argv,
"ERROR: SWORD library not compiled with bzip2 compression support.\n\tBe sure libbz2 is available when compiling SWORD library");
468 else if (compType ==
"XZ") {
472 usage(*argv,
"ERROR: SWORD library not compiled with xz compression support.\n\tBe sure liblzma is available when compiling SWORD library");
478 cout <<
"path: " << path <<
" teiDoc: " << teiDoc <<
" compressType: " << compType <<
" ldType: " << modDrv <<
" normalize: " <<
normalize <<
"\n";
483 SWBuf modName = path;
484 int pathlen = path.length();
485 char lastChar = path[pathlen - 1];
486 if (lastChar !=
'/' && lastChar !=
'\\') {
494 vector<string> linkBuf;
496 if (modDrv ==
"zLD") {
498 fprintf(stderr,
"error: %s: couldn't create module at path: %s \n", program.c_str(), modName.c_str());
501 module =
new zLD(modName, 0, 0, 30, compressor);
503 else if (modDrv ==
"RawLD") {
505 fprintf(stderr,
"error: %s: couldn't create module at path: %s \n", program.c_str(), modName.c_str());
508 module =
new RawLD(modName);
512 fprintf(stderr,
"error: %s: couldn't create module at path: %s \n", program.c_str(), modName.c_str());
515 module =
new RawLD4(modName);
520 if (cipherKey.size()) {
521 fprintf(stderr,
"Adding cipher filter with phrase: %s\n", cipherKey.c_str() );
527 fprintf(stderr,
"The module is not writable. Writing text to it will not work.\nExiting.\n" );
532 ifstream infile(teiDoc);
534 fprintf(stderr,
"error: %s: couldn't open input file: %s \n", program.c_str(), teiDoc.c_str());
546 bool intoken =
false;
549 while (infile.good()) {
551 curChar = infile.get();
558 if (!intoken && curChar ==
'<') {
564 if (intoken && curChar ==
'>') {
577 token.append(curChar);
580 case '>' : text.append(
">");
break;
581 case '<' : text.append(
"<");
break;
582 default : text.append(curChar);
break;
614 SWBuf suggestedModuleName = path;
615 if (lastChar ==
'/' || lastChar ==
'\\') {
616 suggestedModuleName.setSize(--pathlen);
619 lastChar = suggestedModuleName[pathlen - 1];
620 if (lastChar ==
'.') {
621 suggestedModuleName =
"???";
628 const char *
m = strrchr(suggestedModuleName.c_str(),
'/');
630 m = strrchr(suggestedModuleName.c_str(),
'\\');
633 suggestedModuleName = m+1;
637 recommendedPath += suggestedModuleName;
638 recommendedPath +=
"/dict";
640 fprintf(stderr,
"\nSuggested conf (replace ??? with appropriate values)\n\n");
641 fprintf(stderr,
"[%s]\n", suggestedModuleName.c_str());
642 fprintf(stderr,
"DataPath=%s\n", recommendedPath.c_str());
643 fprintf(stderr,
"Description=???\n");
644 fprintf(stderr,
"SourceType=TEI\n");
645 fprintf(stderr,
"Encoding=%s\n", (normalize ?
"UTF-8" :
"???"));
646 fprintf(stderr,
"ModDrv=%s\n", modDrv.c_str());
648 fprintf(stderr,
"CompressType=%s\n", compType.c_str());
650 if (cipherKey.size()) {
651 fprintf(stderr,
"CipherKey=%s\n", cipherKey.c_str());
virtual SWKey * createKey() const
void normalizeInput(SWKey &key, SWBuf &text)
virtual void setEntry(const char *inbuf, long len=-1)
const char * getName() const
bool handleToken(SWBuf &text, XMLTag token)
virtual char setKey(const SWKey *ikey)
const char * toString() const
static char createModule(const char *path)
virtual void linkEntry(const SWKey *sourceKey)
int detectUTF8(const char *txt)
static char createModule(const char *path)
virtual bool isWritable() const
void writeEntry(SWModule *book, SWBuf keyBuffer, SWBuf entBuffer)
virtual SWModule & addRawFilter(SWFilter *newFilter)
const char * getAttribute(const char *attribName, int partNum=-1, char partSplit= '|') const
void usage(const char *app)
bool isEndTag(const char *eID=0) const
static char createModule(const char *path)
void linkToEntry(VerseKey &linkKey, VerseKey &dest)