[sword-svn] r2354 - trunk/utilities
dmsmith at crosswire.org
dmsmith at crosswire.org
Mon Apr 27 10:52:10 MST 2009
Author: dmsmith
Date: 2009-04-27 10:52:10 -0700 (Mon, 27 Apr 2009)
New Revision: 2354
Modified:
trunk/utilities/osis2mod.cpp
Log:
in osis2mod pulled the utf8 code up into its own method
Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp 2009-04-27 16:19:30 UTC (rev 2353)
+++ trunk/utilities/osis2mod.cpp 2009-04-27 17:52:10 UTC (rev 2354)
@@ -75,11 +75,10 @@
#ifdef _ICU_
UTF8NFC normalizer;
+Latin1UTF8 converter;
+#endif
int normalized = 0;
-
-Latin1UTF8 converter;
int converted = 0;
-#endif
SWText *module = 0;
VerseKey currentVerse;
@@ -174,6 +173,47 @@
return countUTF8 ? 1 : -1;
}
+void prepareSWText(const char *key, SWBuf &text)
+{
+ // Always check on UTF8 and report on non-UTF8 entries
+ int utf8State = detectUTF8(text.c_str());
+
+ // Trust, but verify.
+ if (!normalize && !utf8State) {
+ cout << "Warning: " << key << ": Should be converted to UTF-8 (" << text << ")" << endl;
+ }
+
+#ifdef _ICU_
+ if (normalize) {
+ // Don't need to normalize text that is ASCII
+ // But assume other non-UTF-8 text is Latin1 (cp1252) and convert it to UTF-8
+ if (!utf8State) {
+ cout << "Warning: " << key << ": Converting to UTF-8 (" << text << ")" << endl;
+ converter.processText(text, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks
+ converted++;
+
+ // Prepare for double check. This probably can be removed.
+ // But for now we are running the check again.
+ // This is to determine whether we need to normalize output of the conversion.
+ utf8State = detectUTF8(text.c_str());
+ }
+
+ // Double check. This probably can be removed.
+ if (!utf8State) {
+ cout << "Error: " << key << ": Converting to UTF-8 (" << text << ")" << endl;
+ }
+
+ if (utf8State > 0) {
+ SWBuf before = text;
+ normalizer.processText(text, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks
+ if (before != activeVerseText) {
+ normalized++;
+ }
+ }
+ }
+#endif
+}
+
// This routine converts an osisID or osisRef into one that SWORD can parse into a verse list
// An osisRef is made up of:
// a single osisID
@@ -440,37 +480,8 @@
currentVerse = lastKey;
-#ifdef _ICU_
- int utf8State = detectUTF8(activeVerseText.c_str());
- if (normalize) {
- // Don't need to normalize text that is ASCII
- // But assume other non-UTF-8 text is Latin1 (cp1252) and convert it to UTF-8
- if (!utf8State) {
- cout << "Warning: " << activeOsisID << ": Converting to UTF-8 (" << activeVerseText << ")" << endl;
- converter.processText(activeVerseText, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks
- converted++;
+ prepareSWText(activeOsisID, activeVerseText);
- // Prepare for double check. This probably can be removed.
- // But for now we are running the check again.
- // This is to determine whether we need to normalize output of the conversion.
- utf8State = detectUTF8(activeVerseText.c_str());
- }
-
- // Double check. This probably can be removed.
- if (!utf8State) {
- cout << "Error: " << activeOsisID << ": Converting to UTF-8 (" << activeVerseText << ")" << endl;
- }
-
- if (utf8State > 0) {
- SWBuf before = activeVerseText;
- normalizer.processText(activeVerseText, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks
- if (before != activeVerseText) {
- normalized++;
- }
- }
- }
-#endif
-
// Put the revision into the module
int testmt = currentVerse.Testament();
if ((testmt == 1 && firstOT) || (testmt == 2 && firstNT)) {
More information about the sword-cvs
mailing list