[sword-svn] r2354 - trunk/utilities

dmsmith at crosswire.org dmsmith at crosswire.org
Mon Apr 27 10:52:10 MST 2009


Author: dmsmith
Date: 2009-04-27 10:52:10 -0700 (Mon, 27 Apr 2009)
New Revision: 2354

Modified:
   trunk/utilities/osis2mod.cpp
Log:
in osis2mod pulled the utf8 code up into its own method

Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp	2009-04-27 16:19:30 UTC (rev 2353)
+++ trunk/utilities/osis2mod.cpp	2009-04-27 17:52:10 UTC (rev 2354)
@@ -75,11 +75,10 @@
 
 #ifdef _ICU_
 UTF8NFC normalizer;
+Latin1UTF8 converter;
+#endif
 int normalized = 0;
-
-Latin1UTF8 converter;
 int converted = 0;
-#endif
 
 SWText *module = 0;
 VerseKey currentVerse;
@@ -174,6 +173,47 @@
 	return countUTF8 ? 1 : -1;
 }
 
+void prepareSWText(const char *key, SWBuf &text)
+{
+	// Always check on UTF8 and report on non-UTF8 entries
+	int utf8State = detectUTF8(text.c_str());
+
+	// Trust, but verify.
+	if (!normalize && !utf8State) {
+		cout << "Warning: " << key << ": Should be converted to UTF-8 (" << text << ")" << endl;
+	}
+
+#ifdef _ICU_
+	if (normalize) {
+		// Don't need to normalize text that is ASCII
+		// But assume other non-UTF-8 text is Latin1 (cp1252) and convert it to UTF-8
+		if (!utf8State) {
+			cout << "Warning: " << key << ": Converting to UTF-8 (" << text << ")" << endl;
+			converter.processText(text, (SWKey *)2);  // note the hack of 2 to mimic a real key. TODO: remove all hacks
+			converted++;
+
+			// Prepare for double check. This probably can be removed.
+			// But for now we are running the check again.
+			// This is to determine whether we need to normalize output of the conversion.
+			utf8State = detectUTF8(text.c_str());
+		}
+
+		// Double check. This probably can be removed.
+		if (!utf8State) {
+			cout << "Error: " << key << ": Converting to UTF-8 (" << text << ")" << endl;
+		}
+
+		if (utf8State > 0) {
+			SWBuf before = text;
+			normalizer.processText(text, (SWKey *)2);  // note the hack of 2 to mimic a real key. TODO: remove all hacks
+			if (before != activeVerseText) {
+				normalized++;
+			}
+		}
+	}
+#endif
+}
+
 // This routine converts an osisID or osisRef into one that SWORD can parse into a verse list
 // An osisRef is made up of:
 // a single osisID
@@ -440,37 +480,8 @@
 
 		currentVerse = lastKey;
 
-#ifdef _ICU_
-		int utf8State = detectUTF8(activeVerseText.c_str());
-		if (normalize) {
-			// Don't need to normalize text that is ASCII
-			// But assume other non-UTF-8 text is Latin1 (cp1252) and convert it to UTF-8
-			if (!utf8State) {
-				cout << "Warning: " << activeOsisID << ": Converting to UTF-8 (" << activeVerseText << ")" << endl;
-				converter.processText(activeVerseText, (SWKey *)2);  // note the hack of 2 to mimic a real key. TODO: remove all hacks
-				converted++;
+		prepareSWText(activeOsisID, activeVerseText);
 
-				// Prepare for double check. This probably can be removed.
-				// But for now we are running the check again.
-				// This is to determine whether we need to normalize output of the conversion.
-				utf8State = detectUTF8(activeVerseText.c_str());
-			}
-
-			// Double check. This probably can be removed.
-			if (!utf8State) {
-				cout << "Error: " << activeOsisID << ": Converting to UTF-8 (" << activeVerseText << ")" << endl;
-			}
-
-			if (utf8State > 0) {
-				SWBuf before = activeVerseText;
-				normalizer.processText(activeVerseText, (SWKey *)2);  // note the hack of 2 to mimic a real key. TODO: remove all hacks
-				if (before != activeVerseText) {
-					normalized++;
-				}
-			}
-		}
-#endif
-
 		// Put the revision into the module
 		int testmt = currentVerse.Testament();
 		if ((testmt == 1 && firstOT) || (testmt == 2 && firstNT)) {




More information about the sword-cvs mailing list