[sword-svn] r2136 - in trunk: src/modules/filters utilities
chrislit at www.crosswire.org
chrislit at www.crosswire.org
Sun Feb 24 02:57:46 MST 2008
Author: chrislit
Date: 2008-02-24 02:57:46 -0700 (Sun, 24 Feb 2008)
New Revision: 2136
Modified:
trunk/src/modules/filters/utf8nfc.cpp
trunk/utilities/osis2mod.cpp
Log:
committed NFC patches from DM (We can use this as a basis if further tweaking is necessary.)
Modified: trunk/src/modules/filters/utf8nfc.cpp
===================================================================
--- trunk/src/modules/filters/utf8nfc.cpp 2008-02-05 01:37:25 UTC (rev 2135)
+++ trunk/src/modules/filters/utf8nfc.cpp 2008-02-24 09:57:46 UTC (rev 2136)
@@ -9,6 +9,9 @@
#include <stdlib.h>
#include <utilstr.h>
+#include <unicode/unistr.h>
+#include <unicode/normlzr.h>
+#include <unicode/unorm.h>
#include <utf8nfc.h>
#include <swbuf.h>
@@ -25,26 +28,21 @@
char UTF8NFC::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
- if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
return -1;
- int32_t len = text.length() * 2;
- source = new UChar[len + 1]; //each char could become a surrogate pair
+ UErrorCode status = U_ZERO_ERROR;
+ UnicodeString source(text.getRawData(), text.length(), conv, status);
+ UnicodeString target;
- // Convert UTF-8 string to UTF-16 (UChars)
- len = ucnv_toUChars(conv, source, len, text.c_str(), -1, &err);
- target = new UChar[len + 1];
+ status = U_ZERO_ERROR;
+ Normalizer::normalize(source, UNORM_NFC, 0, target, status);
- //canonical composition
- unorm_normalize(source, len, UNORM_NFC, 0, target, len, &err);
+ status = U_ZERO_ERROR;
+ text.setSize(text.size()*2); // potentially, it can grow to 2x the original size
+ int32_t len = target.extract(text.getRawData(), text.size(), conv, status);
+ text.setSize(len);
- text.setSize(text.size()*2);
- len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target, -1, &err);
- text.setSize(len);
-
- delete [] source;
- delete [] target;
-
return 0;
}
Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp 2008-02-05 01:37:25 UTC (rev 2135)
+++ trunk/utilities/osis2mod.cpp 2008-02-24 09:57:46 UTC (rev 2136)
@@ -23,6 +23,10 @@
#include <zipcomprs.h>
#include <cipherfil.h>
+#ifdef _ICU_
+#include <utf8nfc.h>
+#endif
+
//#define DEBUG
// Debug for simple transformation stack
@@ -34,6 +38,10 @@
using namespace std;
+#ifdef _ICU_
+UTF8NFC normalizer;
+#endif
+
SWText *module = 0;
VerseKey *currentVerse = 0;
char activeOsisID[255];
@@ -50,11 +58,12 @@
"Jude", "Rev"};
static bool inCanonicalOSISBook = true; // osisID is for a book that is not in Sword's canon
+static bool normalize = false; // Whether to normalize UTF-8 to NFC
bool isOSISAbbrev(const char *buf) {
bool match = false;
for (int i = 0; i < 66; i++) {
- if (!strcmp(buf, osisabbrevs[i])){
+ if (!strcmp(buf, osisabbrevs[i])) {
match = true;
break;
}
@@ -141,6 +150,12 @@
makeKJVRef(key);
}
+#ifdef _ICU_
+ if (normalize) {
+ normalizer.processText(activeVerseText, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks
+ }
+#endif
+
SWBuf currentText = module->getRawEntry();
if (currentText.length()) {
cout << "Appending entry: " << key.getOSISRef() << ": " << activeVerseText << endl;
@@ -650,6 +665,8 @@
fprintf(stderr, "\t\t\t\t 2 - verse; 3 - chapter; 4 - book\n");
fprintf(stderr, " -c <cipher_key>\t encipher module using supplied key\n");
fprintf(stderr, "\t\t\t\t (default no enciphering)\n");
+ fprintf(stderr, " -n\t\t\t normalize UTF-8 to NFC (default is to leave text unmodified)\n");
+ fprintf(stderr, "\t\t\t\t Note: all UTF-8 texts should be normalized to NFC\n");
exit(-1);
}
@@ -692,6 +709,13 @@
}
usage(*argv, "-b requires one of <2|3|4>");
}
+ else if (!strcmp(argv[i], "-n")) {
+ normalize = true;
+#ifndef _ICU_
+ normalize = false;
+ cout << program << " is not compiled with support for ICU. Ignoring -n flag." << endl;
+#endif
+ }
else if (!strcmp(argv[i], "-c")) {
if (i+1 < argc) cipherKey = argv[++i];
else usage(*argv, "-c requires <cipher_key>");
@@ -706,7 +730,7 @@
}
#ifdef DEBUG
- cout << "path: " << path << " osisDoc: " << osisDoc << " create: " << append << " compressType: " << compType << " blockType: " << iType << " cipherKey: " << cipherKey.c_str() << "\n";
+ cout << "path: " << path << " osisDoc: " << osisDoc << " create: " << append << " compressType: " << compType << " blockType: " << iType << " cipherKey: " << cipherKey.c_str() << " normalize: " << normalize << "\n";
cout << "";
// exit(-3);
#endif
@@ -715,8 +739,8 @@
if (!append) { // == 0 then create module
// Try to initialize a default set of datafiles and indicies at our
// datapath location passed to us from the user.
- if ( compressor ){
- if ( zText::createModule(path, iType) ){
+ if ( compressor ) {
+ if ( zText::createModule(path, iType) ) {
fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program, path);
exit(-3);
}
@@ -735,7 +759,7 @@
}
// Do some initialization stuff
- if (compressor){
+ if (compressor) {
module = new zText(path, 0, 0, iType, compressor);
}
else{
@@ -744,7 +768,7 @@
SWFilter *cipherFilter = 0;
- if (!cipherKey.empty()){
+ if (!cipherKey.empty()) {
fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() );
cipherFilter = new CipherFilter(cipherKey.c_str());
module->AddRawFilter(cipherFilter);
More information about the sword-cvs
mailing list