[sword-svn] r1992 - in trunk: . src/mgr src/modules utilities

scribe at www.crosswire.org scribe at www.crosswire.org
Tue Nov 14 19:56:24 MST 2006


Author: scribe
Date: 2006-11-14 19:56:23 -0700 (Tue, 14 Nov 2006)
New Revision: 1992

Modified:
   trunk/src/mgr/swmgr.cpp
   trunk/src/modules/swmodule.cpp
   trunk/usrinst.sh
   trunk/utilities/imp2gbs.cpp
Log:
Updated imp2gbs to recognize new options
Added more toward better image path support
Added new .conf option: SearchOption=IncludeKeyInSearch


Modified: trunk/src/mgr/swmgr.cpp
===================================================================
--- trunk/src/mgr/swmgr.cpp	2006-11-11 22:11:37 UTC (rev 1991)
+++ trunk/src/mgr/swmgr.cpp	2006-11-15 02:56:23 UTC (rev 1992)
@@ -724,6 +724,10 @@
 	datapath = prefixPath;
 	if ((prefixPath[strlen(prefixPath)-1] != '\\') && (prefixPath[strlen(prefixPath)-1] != '/'))
 		datapath += "/";
+
+	// DataPath - relative path to data used by module driver.  May be a directory, may be a File.
+	//   Typically not useful by outside world.  See AbsoluteDataPath, PrefixPath, and RelativePrefixPath
+	//   below.
 	misc1 += ((entry = section.find("DataPath")) != section.end()) ? (*entry).second : (SWBuf)"";
 	char *buf = new char [ strlen(misc1.c_str()) + 1 ];
 	char *buf2 = buf;
@@ -733,6 +737,8 @@
 	if (!strncmp(buf2, "./", 2)) { //remove the leading ./ in the module data path to make it look better
 		buf2 += 2;
 	}
+	// PrefixPath - absolute directory path to the repository in which this module was found
+	section["PrefixPath"] = datapath;
 	if (*buf2)
 		datapath += buf2;
 	delete [] buf;
@@ -870,6 +876,13 @@
 				break;
 			}
 		}
+		SWBuf &rdp = section["RelativeDataPath"];
+		for (int i = rdp.length() - 1; i; i--) {
+			if (rdp[i] == '/') {
+				rdp.setSize(i);
+				break;
+			}
+		}
 	}
 
 	// if a specific module type is set in the config, use this

Modified: trunk/src/modules/swmodule.cpp
===================================================================
--- trunk/src/modules/swmodule.cpp	2006-11-11 22:11:37 UTC (rev 1991)
+++ trunk/src/modules/swmodule.cpp	2006-11-15 02:56:23 UTC (rev 1992)
@@ -929,6 +929,7 @@
 	SWKey textkey;
 	char *word = 0;
 	char *wordBuf = 0;
+	SWBuf c;
 
 
 	// turn all filters to default values
@@ -965,6 +966,7 @@
  
 	standard::StandardAnalyzer *an = new standard::StandardAnalyzer();
 	SWBuf target = getConfigEntry("AbsoluteDataPath");
+	bool includeKeyInSearch = getConfig().has("SearchOption", "IncludeKeyInSearch");
 	char ch = target.c_str()[strlen(target.c_str())-1];
 	if ((ch != '/') && (ch != '\\'))
 		target.append('/');
@@ -1077,6 +1079,13 @@
 			lucene_utf8towcs(wcharBuffer, keyText, MAX_CONV_SIZE); //keyText must be utf8
 			doc->add( *Field::UnIndexed(_T("key"), wcharBuffer ) );
 
+			if (includeKeyInSearch) {
+				c = keyText;
+				c += " ";
+				c += content;
+				content = c.c_str();
+			}
+
 			lucene_utf8towcs(wcharBuffer, content, MAX_CONV_SIZE); //content must be utf8
 			doc->add( *Field::UnStored(_T("content"), wcharBuffer) );
 

Modified: trunk/usrinst.sh
===================================================================
--- trunk/usrinst.sh	2006-11-11 22:11:37 UTC (rev 1991)
+++ trunk/usrinst.sh	2006-11-15 02:56:23 UTC (rev 1992)
@@ -5,7 +5,7 @@
 OPTIONS="--without-conf $OPTIONS"
 OPTIONS="--sysconfdir=/etc $OPTIONS"
 #OPTIONS="--with-vcl $OPTIONS"
-#OPTIONS="--enable-debug $OPTIONS"
+OPTIONS="--enable-debug $OPTIONS"
 #OPTIONS="--enable-profile $OPTIONS"
 OPTIONS="--with-lucene $OPTIONS"
 OPTIONS="--with-icu $OPTIONS"

Modified: trunk/utilities/imp2gbs.cpp
===================================================================
--- trunk/utilities/imp2gbs.cpp	2006-11-11 22:11:37 UTC (rev 1991)
+++ trunk/utilities/imp2gbs.cpp	2006-11-15 02:56:23 UTC (rev 1992)
@@ -1,124 +1,202 @@
 #include <ctype.h>
 #include <stdio.h>
-#include <fcntl.h>
 #include <errno.h>
 #include <stdlib.h>
 
-#ifndef __GNUC__
-#include <io.h>
-#else
-#include <unistd.h>
-#endif
-
 #include <entriesblk.h>
 #include <iostream>
 #include <treekeyidx.h>
 #include <rawgenbook.h>
+#include <utilstr.h>
+#include <filemgr.h>
+#include <utf8greekaccents.h>
+#include <stringmgr.h>
 
+//#ifdef _ICU_
+
+#include <unicode/utypes.h>
+#include <unicode/ucnv.h>
+#include <unicode/ustring.h>
+#include <unicode/uchar.h>
+
+#include <unicode/unistr.h>
+#include <unicode/translit.h>
+
+#include <unicode/locid.h>
+
+//#endif
+
 #ifndef NO_SWORD_NAMESPACE
-using sword::TreeKeyIdx;
-using sword::RawGenBook;
-using sword::SWKey;
+using namespace sword;
 #endif
 
-void printTree(TreeKeyIdx treeKey, TreeKeyIdx *target = 0, int level = 1) {
-  if (!target)
-    target = &treeKey;
-  
-  unsigned long currentOffset = target->getOffset();
-  std::cout << ((currentOffset == treeKey.getOffset()) ? "==>" : "");
-  for (int i = 0; i < level; i++) std::cout << "\t";
-  std::cout << treeKey.getLocalName() << "/\n";
-  if (treeKey.firstChild()) {
-    printTree(treeKey, target, level+1);
-    treeKey.parent();
-  }
-  if (treeKey.nextSibling())
-    printTree(treeKey, target, level);
+SWBuf outPath;
+SWBuf inFile;
+bool  toUpper     = false;
+bool  greekFilter = false;
+int   lexLevels   = 0;
+UTF8GreekAccents greekAccentsFilter;
+
+
+void usage(const char *app) {
+	fprintf(stderr, "imp2gbs 1.0 General Book module creation tool for the SWORD Project\n\n");
+	fprintf(stderr, "usage: %s <inFile> [OPTIONS]\n", app);
+	fprintf(stderr, "\t-o <outPath>\n\t\tSpecify an output Path other than inFile location.\n");
+	fprintf(stderr, "\t-U\n\t\tKey filter: Convert toUpper\n");
+	fprintf(stderr, "\t-g\n\t\tKey filter: Strip Greek diacritics\n");
+	fprintf(stderr, "\t-l <levels>\n\t\tKey filter: Pseudo-Lexicon n-level generation using first character\n");
+	fprintf(stderr, "\t\te.g. -l 2 \"Abbey\" -> \"A/AB/Abbey\"\n");
+	fprintf(stderr, "\n");
+	exit (-1);
 }
 
-int readline(FILE* infile, char* lineBuffer) {
-  signed char c;
-  char* lbPtr = lineBuffer;
-  while ((c = fgetc(infile)) != EOF) {
-    *lbPtr++ = c;
-    if (c == 10) {
-      *lbPtr = 0;
-      return (strlen(lineBuffer));
-    }
-  }
-  return 0;
+
+void parseParams(int argc, char **argv) {
+
+	if (argc < 2) {
+		usage(*argv);
+	}
+
+	inFile = argv[1];
+
+	for (int i = 2; i < argc; i++) {
+		if (!strcmp(argv[i], "-o")) {
+			if ((i+1 < argc) && (argv[i+1][0] != '-')) {
+				outPath = argv[i+1];
+				i++;
+			}
+			else usage(*argv);
+		}
+		else if (!strcmp(argv[i], "-U")) {
+			if (StringMgr::hasUTF8Support()) {
+				toUpper = true;
+			}
+			else {
+				fprintf(stderr, "Error: %s.  Cannot reliably toUpper without UTF8 support\n\t(recompile with ICU enabled)\n\n", *argv);
+				usage(*argv);
+			}
+		}
+		else if (!strcmp(argv[i], "-g")) {
+			greekFilter = true;
+		}
+		else if (!strcmp(argv[i], "-l")) {
+			if (i+1 < argc) {
+				lexLevels = atoi(argv[i+1]);
+				i++;
+			}
+			if (!lexLevels) usage(*argv);
+		}
+	}
+	if (!outPath.size()) {
+		outPath = inFile;
+		int i;
+		for (i = 0; (i < outPath.size() && outPath[i] != '.'); i++);
+		outPath.size(i);
+	}
 }
+  
 
+void writeEntry(SWModule *book, SWBuf keyBuffer, SWBuf entBuffer) {
+
+
+	if (greekFilter) {
+		greekAccentsFilter.processText(keyBuffer);
+	}
+
+	if (toUpper) {
+		unsigned size = (keyBuffer.size()+5)*3;
+		keyBuffer.setFillByte(0);
+		keyBuffer.resize(size);
+		StringMgr::getSystemStringMgr()->upperUTF8(keyBuffer.getRawData(), size-2);
+	}
+
+//#ifdef _ICU_
+	if (lexLevels) {
+		unsigned size = (keyBuffer.size()+(lexLevels*2));
+		keyBuffer.setFillByte(0);
+		keyBuffer.resize(size);
+			
+		UErrorCode err = U_ZERO_ERROR;
+		
+		int max = (size+5)*3;
+		UChar *ubuffer = new UChar[max+10];
+		int32_t len;
+		
+		u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err);
+		if (err == U_ZERO_ERROR) {
+			int totalShift = 0;
+			for (int i = lexLevels; i; i--) {
+				int shift = (i < len)? i : len;
+				memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar));
+				ubuffer[shift] = '/';
+				totalShift += (shift+1);
+			}
+			UChar *upper = new UChar[(totalShift+1)*3];
+			memcpy(upper, ubuffer, totalShift*sizeof(UChar));
+			upper[totalShift] = 0;
+			len = u_strToUpper(upper, (totalShift+1)*3, upper, -1, 0, &err);
+			memmove(ubuffer+len, ubuffer+totalShift, (max-totalShift)*sizeof(UChar));
+			memcpy(ubuffer, upper, len*sizeof(UChar));
+			delete [] upper;
+			u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err);
+		}
+		
+		delete [] ubuffer;
+	}
+//#endif
+
+	std::cout << keyBuffer << std::endl;
+	book->setKey(keyBuffer.c_str());
+	book->setEntry(entBuffer, strlen(entBuffer));
+}
+
+
 int main(int argc, char **argv) {
+	greekAccentsFilter.setOptionValue("Off");		// off = accents off
+	parseParams(argc, argv);
   
-  const char * helptext ="imp2gbs 1.0 General Book module creation tool for the SWORD Project\n  usage:\n   %s <filename> [modname]\n";
+	// Let's see if we can open our input file
+	FileDesc *fd = FileMgr::getSystemFileMgr()->open(inFile, FileMgr::RDONLY);
+	if (fd->getFd() < 0) {
+		fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], inFile.c_str());
+		exit(-2);
+	}
   
-  signed long i = 0;
-  char* keyBuffer = new char[2048];
-  char* entBuffer = new char[1048576];
-  char* lineBuffer = new char[1048576];
-  char modname[16];
+	RawGenBook *book;
   
-  if (argc > 2) {
-    strcpy (modname, argv[2]);
-  }
-  else if (argc > 1) {
-    for (i = 0; (i < 16) && (argv[1][i]) && (argv[1][i] != '.'); i++) {
-      modname[i] = argv[1][i];
-    }
-    modname[i] = 0;
-  }
-  else {
-    fprintf(stderr, helptext, argv[0]);
-    exit(-1);
-  }
+	// Do some initialization stuff
+	RawGenBook::createModule(outPath);
+	book = new RawGenBook(outPath);
   
-  FILE *infile;
-  infile = fopen(argv[1], "r");
-  
-  TreeKeyIdx * treeKey;
-  RawGenBook *book;
-  
-  // Do some initialization stuff
-  TreeKeyIdx::create(modname);
-  treeKey = new TreeKeyIdx(modname);
-  RawGenBook::createModule(modname);
-  delete treeKey;
-  book = new RawGenBook(modname);
-  //DEBUG  TreeKeyIdx root = *((TreeKeyIdx *)((SWKey *)(*book)));
-  
-  while (readline(infile, lineBuffer)) {
-    if (!strncmp(lineBuffer, "$$$", 3)) {
-      if (strlen(keyBuffer) && strlen(entBuffer)) {
-	std::cout << keyBuffer << std::endl;
-	book->setKey(keyBuffer);
-	book->setEntry(entBuffer, strlen(entBuffer));
-      }
-      lineBuffer[strlen(lineBuffer) - 1] = 0;
-      strcpy (keyBuffer, lineBuffer + 3);
-      *entBuffer = 0;
-    }
-    else {
-      strcat (entBuffer, lineBuffer);
-    }
-  }
+	SWBuf lineBuffer;
+	SWBuf keyBuffer;
+	SWBuf entBuffer;
 
-  //handle final entry
-  if (strlen(keyBuffer) && strlen(entBuffer)) {
-    std::cout << keyBuffer << std::endl;
-    book->setKey(keyBuffer);
-    book->setEntry(entBuffer, strlen(entBuffer));
-  }
-  
-  //DEBUG  printTree(root, treeKey);
-  
-  delete book;
-  delete [] keyBuffer;
-  delete [] lineBuffer;
-  delete [] entBuffer;
+	bool more = true;
+	do {
+		more = FileMgr::getLine(fd, lineBuffer);
+		if (lineBuffer.startsWith("$$$")) {
+			if ((keyBuffer.size()) && (entBuffer.size())) {
+				writeEntry(book, keyBuffer, entBuffer);
+			}
+			keyBuffer = lineBuffer;
+			keyBuffer << 3;
+			keyBuffer.trim();
+			entBuffer.size(0);
+		}
+		else {
+			if (keyBuffer.size()) {
+				entBuffer += lineBuffer;
+				entBuffer += "\n";
+			}
+		}
+	} while (more);
 
-  return 0;
+	delete book;
+
+	FileMgr::getSystemFileMgr()->close(fd);
+
+	return 0;
 }
 
 




More information about the sword-cvs mailing list