[sword-svn] r2136 - in trunk: src/modules/filters utilities

chrislit at www.crosswire.org chrislit at www.crosswire.org
Sun Feb 24 02:57:46 MST 2008


Author: chrislit
Date: 2008-02-24 02:57:46 -0700 (Sun, 24 Feb 2008)
New Revision: 2136

Modified:
   trunk/src/modules/filters/utf8nfc.cpp
   trunk/utilities/osis2mod.cpp
Log:
committed NFC patches from DM (We can use this as a basis if further tweaking is necessary.)

Modified: trunk/src/modules/filters/utf8nfc.cpp
===================================================================
--- trunk/src/modules/filters/utf8nfc.cpp	2008-02-05 01:37:25 UTC (rev 2135)
+++ trunk/src/modules/filters/utf8nfc.cpp	2008-02-24 09:57:46 UTC (rev 2136)
@@ -9,6 +9,9 @@
 #include <stdlib.h>
 
 #include <utilstr.h>
+#include <unicode/unistr.h>
+#include <unicode/normlzr.h>
+#include <unicode/unorm.h>
 
 #include <utf8nfc.h>
 #include <swbuf.h>
@@ -25,26 +28,21 @@
 
 char UTF8NFC::processText(SWBuf &text, const SWKey *key, const SWModule *module)
 {
-	 if ((unsigned long)key < 2)	// hack, we're en(1)/de(0)ciphering
+	if ((unsigned long)key < 2)	// hack, we're en(1)/de(0)ciphering
 		return -1;
         
-	int32_t len = text.length() * 2;
-        source = new UChar[len + 1]; //each char could become a surrogate pair
+	UErrorCode status = U_ZERO_ERROR;
+	UnicodeString source(text.getRawData(), text.length(), conv, status);
+	UnicodeString target;
 
-	// Convert UTF-8 string to UTF-16 (UChars)
-        len = ucnv_toUChars(conv, source, len, text.c_str(), -1, &err);
-        target = new UChar[len + 1];
+	status = U_ZERO_ERROR;
+	Normalizer::normalize(source, UNORM_NFC, 0, target, status);
 
-        //canonical composition
-        unorm_normalize(source, len, UNORM_NFC, 0, target, len, &err);
+	status = U_ZERO_ERROR;
+	text.setSize(text.size()*2); // potentially, it can grow to 2x the original size
+	int32_t len = target.extract(text.getRawData(), text.size(), conv, status);
+	text.setSize(len);
 
-	   text.setSize(text.size()*2);
-	   len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target, -1, &err);
-	   text.setSize(len);
-
-        delete [] source;
-        delete [] target;
-
 	return 0;
 }
 

Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp	2008-02-05 01:37:25 UTC (rev 2135)
+++ trunk/utilities/osis2mod.cpp	2008-02-24 09:57:46 UTC (rev 2136)
@@ -23,6 +23,10 @@
 #include <zipcomprs.h>
 #include <cipherfil.h>
 
+#ifdef _ICU_
+#include <utf8nfc.h>
+#endif
+
 //#define DEBUG
 
 // Debug for simple transformation stack
@@ -34,6 +38,10 @@
 
 using namespace std;
 
+#ifdef _ICU_
+UTF8NFC normalizer;
+#endif
+
 SWText *module = 0;
 VerseKey *currentVerse = 0;
 char activeOsisID[255];
@@ -50,11 +58,12 @@
 	"Jude", "Rev"};
 
 static bool inCanonicalOSISBook = true; // osisID is for a book that is not in Sword's canon
+static bool normalize = false; // Whether to normalize UTF-8 to NFC
 
 bool isOSISAbbrev(const char *buf) {
 	bool match = false;
 	for (int i = 0; i < 66; i++) {
-		if (!strcmp(buf, osisabbrevs[i])){
+		if (!strcmp(buf, osisabbrevs[i])) {
 			match = true;
 			break;
 		}
@@ -141,6 +150,12 @@
 				makeKJVRef(key);
 			}
 
+#ifdef _ICU_
+			if (normalize) {
+				normalizer.processText(activeVerseText, (SWKey *)2);  // note the hack of 2 to mimic a real key. TODO: remove all hacks
+			}
+#endif
+
 			SWBuf currentText = module->getRawEntry();
 			if (currentText.length()) {
 				cout << "Appending entry: " << key.getOSISRef() << ": " << activeVerseText << endl;
@@ -650,6 +665,8 @@
 	fprintf(stderr, "\t\t\t\t 2 - verse; 3 - chapter; 4 - book\n");
 	fprintf(stderr, "  -c <cipher_key>\t encipher module using supplied key\n");
 	fprintf(stderr, "\t\t\t\t (default no enciphering)\n");
+	fprintf(stderr, "  -n\t\t\t normalize UTF-8 to NFC (default is to leave text unmodified)\n");
+	fprintf(stderr, "\t\t\t\t Note: all UTF-8 texts should be normalized to NFC\n");
 	exit(-1);
 }
 
@@ -692,6 +709,13 @@
 			}
 			usage(*argv, "-b requires one of <2|3|4>");
 		}
+		else if (!strcmp(argv[i], "-n")) {
+			normalize = true;
+#ifndef _ICU_
+			normalize = false;
+			cout << program << " is not compiled with support for ICU. Ignoring -n flag." << endl;
+#endif
+		}
 		else if (!strcmp(argv[i], "-c")) {
 			if (i+1 < argc) cipherKey = argv[++i];
 			else usage(*argv, "-c requires <cipher_key>");
@@ -706,7 +730,7 @@
 	}
 
 #ifdef DEBUG
-	cout << "path: " << path << " osisDoc: " << osisDoc << " create: " << append << " compressType: " << compType << " blockType: " << iType << " cipherKey: " << cipherKey.c_str() << "\n";
+	cout << "path: " << path << " osisDoc: " << osisDoc << " create: " << append << " compressType: " << compType << " blockType: " << iType << " cipherKey: " << cipherKey.c_str() << " normalize: " << normalize << "\n";
 	cout << "";
 //	exit(-3);
 #endif
@@ -715,8 +739,8 @@
 	if (!append) {	// == 0 then create module
 	// Try to initialize a default set of datafiles and indicies at our
 	// datapath location passed to us from the user.
-		if ( compressor ){
-			if ( zText::createModule(path, iType) ){
+		if ( compressor ) {
+			if ( zText::createModule(path, iType) ) {
 				fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program, path);
 				exit(-3);
 			}
@@ -735,7 +759,7 @@
 	}
 
 	// Do some initialization stuff
-	if (compressor){
+	if (compressor) {
 		module = new zText(path, 0, 0, iType, compressor);
 	}
 	else{
@@ -744,7 +768,7 @@
 
 	SWFilter *cipherFilter = 0;
 
-	if (!cipherKey.empty()){
+	if (!cipherKey.empty()) {
 		fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() );
 		cipherFilter = new CipherFilter(cipherKey.c_str());
 		module->AddRawFilter(cipherFilter);




More information about the sword-cvs mailing list