[sword-svn] r1992 - in trunk: . src/mgr src/modules utilities
scribe at www.crosswire.org
scribe at www.crosswire.org
Tue Nov 14 19:56:24 MST 2006
Author: scribe
Date: 2006-11-14 19:56:23 -0700 (Tue, 14 Nov 2006)
New Revision: 1992
Modified:
trunk/src/mgr/swmgr.cpp
trunk/src/modules/swmodule.cpp
trunk/usrinst.sh
trunk/utilities/imp2gbs.cpp
Log:
Updated imp2gbs to recognize new options
Added more toward better image path support
Added new .conf option: SearchOption=IncludeKeyInSearch
Modified: trunk/src/mgr/swmgr.cpp
===================================================================
--- trunk/src/mgr/swmgr.cpp 2006-11-11 22:11:37 UTC (rev 1991)
+++ trunk/src/mgr/swmgr.cpp 2006-11-15 02:56:23 UTC (rev 1992)
@@ -724,6 +724,10 @@
datapath = prefixPath;
if ((prefixPath[strlen(prefixPath)-1] != '\\') && (prefixPath[strlen(prefixPath)-1] != '/'))
datapath += "/";
+
+ // DataPath - relative path to data used by module driver. May be a directory, may be a File.
+ // Typically not useful by outside world. See AbsoluteDataPath, PrefixPath, and RelativePrefixPath
+ // below.
misc1 += ((entry = section.find("DataPath")) != section.end()) ? (*entry).second : (SWBuf)"";
char *buf = new char [ strlen(misc1.c_str()) + 1 ];
char *buf2 = buf;
@@ -733,6 +737,8 @@
if (!strncmp(buf2, "./", 2)) { //remove the leading ./ in the module data path to make it look better
buf2 += 2;
}
+ // PrefixPath - absolute directory path to the repository in which this module was found
+ section["PrefixPath"] = datapath;
if (*buf2)
datapath += buf2;
delete [] buf;
@@ -870,6 +876,13 @@
break;
}
}
+ SWBuf &rdp = section["RelativeDataPath"];
+ for (int i = rdp.length() - 1; i; i--) {
+ if (rdp[i] == '/') {
+ rdp.setSize(i);
+ break;
+ }
+ }
}
// if a specific module type is set in the config, use this
Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp 2006-11-11 22:11:37 UTC (rev 1991)
+++ trunk/src/modules/swmodule.cpp 2006-11-15 02:56:23 UTC (rev 1992)
@@ -929,6 +929,7 @@
SWKey textkey;
char *word = 0;
char *wordBuf = 0;
+ SWBuf c;
// turn all filters to default values
@@ -965,6 +966,7 @@
standard::StandardAnalyzer *an = new standard::StandardAnalyzer();
SWBuf target = getConfigEntry("AbsoluteDataPath");
+ bool includeKeyInSearch = getConfig().has("SearchOption", "IncludeKeyInSearch");
char ch = target.c_str()[strlen(target.c_str())-1];
if ((ch != '/') && (ch != '\\'))
target.append('/');
@@ -1077,6 +1079,13 @@
lucene_utf8towcs(wcharBuffer, keyText, MAX_CONV_SIZE); //keyText must be utf8
doc->add( *Field::UnIndexed(_T("key"), wcharBuffer ) );
+ if (includeKeyInSearch) {
+ c = keyText;
+ c += " ";
+ c += content;
+ content = c.c_str();
+ }
+
lucene_utf8towcs(wcharBuffer, content, MAX_CONV_SIZE); //content must be utf8
doc->add( *Field::UnStored(_T("content"), wcharBuffer) );
Modified: trunk/usrinst.sh
===================================================================
--- trunk/usrinst.sh 2006-11-11 22:11:37 UTC (rev 1991)
+++ trunk/usrinst.sh 2006-11-15 02:56:23 UTC (rev 1992)
@@ -5,7 +5,7 @@
OPTIONS="--without-conf $OPTIONS"
OPTIONS="--sysconfdir=/etc $OPTIONS"
#OPTIONS="--with-vcl $OPTIONS"
-#OPTIONS="--enable-debug $OPTIONS"
+OPTIONS="--enable-debug $OPTIONS"
#OPTIONS="--enable-profile $OPTIONS"
OPTIONS="--with-lucene $OPTIONS"
OPTIONS="--with-icu $OPTIONS"
Modified: trunk/utilities/imp2gbs.cpp
===================================================================
--- trunk/utilities/imp2gbs.cpp 2006-11-11 22:11:37 UTC (rev 1991)
+++ trunk/utilities/imp2gbs.cpp 2006-11-15 02:56:23 UTC (rev 1992)
@@ -1,124 +1,202 @@
#include <ctype.h>
#include <stdio.h>
-#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
-#ifndef __GNUC__
-#include <io.h>
-#else
-#include <unistd.h>
-#endif
-
#include <entriesblk.h>
#include <iostream>
#include <treekeyidx.h>
#include <rawgenbook.h>
+#include <utilstr.h>
+#include <filemgr.h>
+#include <utf8greekaccents.h>
+#include <stringmgr.h>
+//#ifdef _ICU_
+
+#include <unicode/utypes.h>
+#include <unicode/ucnv.h>
+#include <unicode/ustring.h>
+#include <unicode/uchar.h>
+
+#include <unicode/unistr.h>
+#include <unicode/translit.h>
+
+#include <unicode/locid.h>
+
+//#endif
+
#ifndef NO_SWORD_NAMESPACE
-using sword::TreeKeyIdx;
-using sword::RawGenBook;
-using sword::SWKey;
+using namespace sword;
#endif
-void printTree(TreeKeyIdx treeKey, TreeKeyIdx *target = 0, int level = 1) {
- if (!target)
- target = &treeKey;
-
- unsigned long currentOffset = target->getOffset();
- std::cout << ((currentOffset == treeKey.getOffset()) ? "==>" : "");
- for (int i = 0; i < level; i++) std::cout << "\t";
- std::cout << treeKey.getLocalName() << "/\n";
- if (treeKey.firstChild()) {
- printTree(treeKey, target, level+1);
- treeKey.parent();
- }
- if (treeKey.nextSibling())
- printTree(treeKey, target, level);
+SWBuf outPath;
+SWBuf inFile;
+bool toUpper = false;
+bool greekFilter = false;
+int lexLevels = 0;
+UTF8GreekAccents greekAccentsFilter;
+
+
+void usage(const char *app) {
+ fprintf(stderr, "imp2gbs 1.0 General Book module creation tool for the SWORD Project\n\n");
+ fprintf(stderr, "usage: %s <inFile> [OPTIONS]\n", app);
+ fprintf(stderr, "\t-o <outPath>\n\t\tSpecify an output Path other than inFile location.\n");
+ fprintf(stderr, "\t-U\n\t\tKey filter: Convert toUpper\n");
+ fprintf(stderr, "\t-g\n\t\tKey filter: Strip Greek diacritics\n");
+ fprintf(stderr, "\t-l <levels>\n\t\tKey filter: Pseudo-Lexicon n-level generation using first character\n");
+ fprintf(stderr, "\t\te.g. -l 2 \"Abbey\" -> \"A/AB/Abbey\"\n");
+ fprintf(stderr, "\n");
+ exit (-1);
}
-int readline(FILE* infile, char* lineBuffer) {
- signed char c;
- char* lbPtr = lineBuffer;
- while ((c = fgetc(infile)) != EOF) {
- *lbPtr++ = c;
- if (c == 10) {
- *lbPtr = 0;
- return (strlen(lineBuffer));
- }
- }
- return 0;
+
+void parseParams(int argc, char **argv) {
+
+ if (argc < 2) {
+ usage(*argv);
+ }
+
+ inFile = argv[1];
+
+ for (int i = 2; i < argc; i++) {
+ if (!strcmp(argv[i], "-o")) {
+ if ((i+1 < argc) && (argv[i+1][0] != '-')) {
+ outPath = argv[i+1];
+ i++;
+ }
+ else usage(*argv);
+ }
+ else if (!strcmp(argv[i], "-U")) {
+ if (StringMgr::hasUTF8Support()) {
+ toUpper = true;
+ }
+ else {
+ fprintf(stderr, "Error: %s. Cannot reliably toUpper without UTF8 support\n\t(recompile with ICU enabled)\n\n", *argv);
+ usage(*argv);
+ }
+ }
+ else if (!strcmp(argv[i], "-g")) {
+ greekFilter = true;
+ }
+ else if (!strcmp(argv[i], "-l")) {
+ if (i+1 < argc) {
+ lexLevels = atoi(argv[i+1]);
+ i++;
+ }
+ if (!lexLevels) usage(*argv);
+ }
+ }
+ if (!outPath.size()) {
+ outPath = inFile;
+ int i;
+ for (i = 0; (i < outPath.size() && outPath[i] != '.'); i++);
+ outPath.size(i);
+ }
}
+
+void writeEntry(SWModule *book, SWBuf keyBuffer, SWBuf entBuffer) {
+
+
+ if (greekFilter) {
+ greekAccentsFilter.processText(keyBuffer);
+ }
+
+ if (toUpper) {
+ unsigned size = (keyBuffer.size()+5)*3;
+ keyBuffer.setFillByte(0);
+ keyBuffer.resize(size);
+ StringMgr::getSystemStringMgr()->upperUTF8(keyBuffer.getRawData(), size-2);
+ }
+
+//#ifdef _ICU_
+ if (lexLevels) {
+ unsigned size = (keyBuffer.size()+(lexLevels*2));
+ keyBuffer.setFillByte(0);
+ keyBuffer.resize(size);
+
+ UErrorCode err = U_ZERO_ERROR;
+
+ int max = (size+5)*3;
+ UChar *ubuffer = new UChar[max+10];
+ int32_t len;
+
+ u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err);
+ if (err == U_ZERO_ERROR) {
+ int totalShift = 0;
+ for (int i = lexLevels; i; i--) {
+ int shift = (i < len)? i : len;
+ memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar));
+ ubuffer[shift] = '/';
+ totalShift += (shift+1);
+ }
+ UChar *upper = new UChar[(totalShift+1)*3];
+ memcpy(upper, ubuffer, totalShift*sizeof(UChar));
+ upper[totalShift] = 0;
+ len = u_strToUpper(upper, (totalShift+1)*3, upper, -1, 0, &err);
+ memmove(ubuffer+len, ubuffer+totalShift, (max-totalShift)*sizeof(UChar));
+ memcpy(ubuffer, upper, len*sizeof(UChar));
+ delete [] upper;
+ u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err);
+ }
+
+ delete [] ubuffer;
+ }
+//#endif
+
+ std::cout << keyBuffer << std::endl;
+ book->setKey(keyBuffer.c_str());
+ book->setEntry(entBuffer, strlen(entBuffer));
+}
+
+
int main(int argc, char **argv) {
+ greekAccentsFilter.setOptionValue("Off"); // off = accents off
+ parseParams(argc, argv);
- const char * helptext ="imp2gbs 1.0 General Book module creation tool for the SWORD Project\n usage:\n %s <filename> [modname]\n";
+ // Let's see if we can open our input file
+ FileDesc *fd = FileMgr::getSystemFileMgr()->open(inFile, FileMgr::RDONLY);
+ if (fd->getFd() < 0) {
+ fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], inFile.c_str());
+ exit(-2);
+ }
- signed long i = 0;
- char* keyBuffer = new char[2048];
- char* entBuffer = new char[1048576];
- char* lineBuffer = new char[1048576];
- char modname[16];
+ RawGenBook *book;
- if (argc > 2) {
- strcpy (modname, argv[2]);
- }
- else if (argc > 1) {
- for (i = 0; (i < 16) && (argv[1][i]) && (argv[1][i] != '.'); i++) {
- modname[i] = argv[1][i];
- }
- modname[i] = 0;
- }
- else {
- fprintf(stderr, helptext, argv[0]);
- exit(-1);
- }
+ // Do some initialization stuff
+ RawGenBook::createModule(outPath);
+ book = new RawGenBook(outPath);
- FILE *infile;
- infile = fopen(argv[1], "r");
-
- TreeKeyIdx * treeKey;
- RawGenBook *book;
-
- // Do some initialization stuff
- TreeKeyIdx::create(modname);
- treeKey = new TreeKeyIdx(modname);
- RawGenBook::createModule(modname);
- delete treeKey;
- book = new RawGenBook(modname);
- //DEBUG TreeKeyIdx root = *((TreeKeyIdx *)((SWKey *)(*book)));
-
- while (readline(infile, lineBuffer)) {
- if (!strncmp(lineBuffer, "$$$", 3)) {
- if (strlen(keyBuffer) && strlen(entBuffer)) {
- std::cout << keyBuffer << std::endl;
- book->setKey(keyBuffer);
- book->setEntry(entBuffer, strlen(entBuffer));
- }
- lineBuffer[strlen(lineBuffer) - 1] = 0;
- strcpy (keyBuffer, lineBuffer + 3);
- *entBuffer = 0;
- }
- else {
- strcat (entBuffer, lineBuffer);
- }
- }
+ SWBuf lineBuffer;
+ SWBuf keyBuffer;
+ SWBuf entBuffer;
- //handle final entry
- if (strlen(keyBuffer) && strlen(entBuffer)) {
- std::cout << keyBuffer << std::endl;
- book->setKey(keyBuffer);
- book->setEntry(entBuffer, strlen(entBuffer));
- }
-
- //DEBUG printTree(root, treeKey);
-
- delete book;
- delete [] keyBuffer;
- delete [] lineBuffer;
- delete [] entBuffer;
+ bool more = true;
+ do {
+ more = FileMgr::getLine(fd, lineBuffer);
+ if (lineBuffer.startsWith("$$$")) {
+ if ((keyBuffer.size()) && (entBuffer.size())) {
+ writeEntry(book, keyBuffer, entBuffer);
+ }
+ keyBuffer = lineBuffer;
+ keyBuffer << 3;
+ keyBuffer.trim();
+ entBuffer.size(0);
+ }
+ else {
+ if (keyBuffer.size()) {
+ entBuffer += lineBuffer;
+ entBuffer += "\n";
+ }
+ }
+ } while (more);
- return 0;
+ delete book;
+
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ return 0;
}
More information about the sword-cvs
mailing list