[sword-svn] r2197 - trunk/utilities
dmsmith at www.crosswire.org
dmsmith at www.crosswire.org
Thu Sep 11 12:13:14 MST 2008
Author: dmsmith
Date: 2008-09-11 12:13:14 -0700 (Thu, 11 Sep 2008)
New Revision: 2197
Modified:
trunk/utilities/osis2mod.cpp
Log:
whitespace change
Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp 2008-09-11 18:49:08 UTC (rev 2196)
+++ trunk/utilities/osis2mod.cpp 2008-09-11 19:13:14 UTC (rev 2197)
@@ -68,19 +68,21 @@
std::vector<ListKey> linkedVerses;
-const char *osisabbrevs[] = {"Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg",
- "Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr", "2Chr", "Ezra", "Neh",
- "Esth", "Job", "Ps", "Prov", "Eccl", "Song", "Isa", "Jer", "Lam", "Ezek",
- "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic", "Nah", "Hab",
- "Zeph", "Hag", "Zech", "Mal",
+const char *osisabbrevs[] = {
+ "Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg", "Ruth",
+ "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr", "2Chr", "Ezra", "Neh",
+ "Esth", "Job", "Ps", "Prov", "Eccl", "Song", "Isa", "Jer",
+ "Lam", "Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jonah",
+ "Mic", "Nah", "Hab", "Zeph", "Hag", "Zech", "Mal",
- "Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", "2Cor", "Gal",
- "Eph", "Phil", "Col", "1Thess", "2Thess", "1Tim", "2Tim", "Titus",
- "Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John", "3John",
- "Jude", "Rev"};
+ "Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", "2Cor",
+ "Gal", "Eph", "Phil", "Col", "1Thess", "2Thess", "1Tim", "2Tim",
+ "Titus", "Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John",
+ "3John", "Jude", "Rev"
+};
static bool inCanonicalOSISBook = true; // osisID is for a book that is not in Sword's canon
-static bool normalize = true; // Whether to normalize UTF-8 to NFC
+static bool normalize = true; // Whether to normalize UTF-8 to NFC
bool isOSISAbbrev(const char *buf) {
bool match = false;
@@ -119,51 +121,51 @@
* author DM Smith
*/
int detectUTF8(const char *txt) {
- unsigned int countUTF8 = 0;
- int count = 0;
-
- // Cast it to make masking and shifting easier
- const unsigned char *p = (const unsigned char*) txt;
- while (*p) {
- // Is the high order bit set?
- if (*p & 0x80) {
- // Then count the number of high order bits that are set.
- // This determines the number of following bytes
- // that are a part of the unicode character
- unsigned char i = *p;
- for (count = 0; i & 0x80; count++) {
- i <<= 1;
- }
+ unsigned int countUTF8 = 0;
+ int count = 0;
+
+ // Cast it to make masking and shifting easier
+ const unsigned char *p = (const unsigned char*) txt;
+ while (*p) {
+ // Is the high order bit set?
+ if (*p & 0x80) {
+ // Then count the number of high order bits that are set.
+ // This determines the number of following bytes
+ // that are a part of the unicode character
+ unsigned char i = *p;
+ for (count = 0; i & 0x80; count++) {
+ i <<= 1;
+ }
- // Validate count:
- // Count 0: bug in code that would cause core walking
- // Count 1: is a pattern of 10nnnnnn,
- // which does not signal the start of a unicode character
- // Count 5 to 8: 111110nn, 1111110n and 11111110 and 11111111
- // are not legal starts, either
- if (count < 2 || count > 4) return 0;
+ // Validate count:
+ // Count 0: bug in code that would cause core walking
+ // Count 1: is a pattern of 10nnnnnn,
+ // which does not signal the start of a unicode character
+ // Count 5 to 8: 111110nn, 1111110n and 11111110 and 11111111
+ // are not legal starts, either
+ if (count < 2 || count > 4) return 0;
- // At this point we expect (count - 1) following characters
- // of the pattern 10nnnnnn
- while (--count && *++p) {
- // The pattern of each following character must be: 10nnnnnn
- // So, compare the top 2 bits.
- if ((0xc0 & *p) != 0x80) return 0;
- }
+ // At this point we expect (count - 1) following characters
+ // of the pattern 10nnnnnn
+ while (--count && *++p) {
+ // The pattern of each following character must be: 10nnnnnn
+ // So, compare the top 2 bits.
+ if ((0xc0 & *p) != 0x80) return 0;
+ }
- // Oops, we've run out of bytes too soon: Cannot be UTF-8
- if (count) return 0;
+ // Oops, we've run out of bytes too soon: Cannot be UTF-8
+ if (count) return 0;
- // We have a valid UTF-8 character, so count it
- countUTF8++;
- }
+ // We have a valid UTF-8 character, so count it
+ countUTF8++;
+ }
- // Advance to the next character to examine.
- p++;
- }
-
- // At this point it is either UTF-8 or 7-bit ascii
- return countUTF8 ? 1 : -1;
+ // Advance to the next character to examine.
+ p++;
+ }
+
+ // At this point it is either UTF-8 or 7-bit ascii
+ return countUTF8 ? 1 : -1;
}
// This routine converts an osisID or osisRef into one that SWORD can parse into a verse list
@@ -369,96 +371,96 @@
return;
}
- strcpy(keyOsisID, currentVerse.getOSISRef());
+ strcpy(keyOsisID, currentVerse.getOSISRef());
- // set keyOsisID to anything that an osisID cannot be.
- if (force) {
- strcpy(keyOsisID, "-force");
- }
+ // set keyOsisID to anything that an osisID cannot be.
+ if (force) {
+ strcpy(keyOsisID, "-force");
+ }
- static VerseKey lastKey;
- lastKey.AutoNormalize(0);
- lastKey.Headings(1);
+ static VerseKey lastKey;
+ lastKey.AutoNormalize(0);
+ lastKey.Headings(1);
- VerseKey saveKey;
- saveKey.AutoNormalize(0);
- saveKey.Headings(1);
- saveKey = currentVerse;
+ VerseKey saveKey;
+ saveKey.AutoNormalize(0);
+ saveKey.Headings(1);
+ saveKey = currentVerse;
- // If we have seen a verse and the supplied one is different then we output the collected one.
- if (*activeOsisID && strcmp(activeOsisID, keyOsisID)) {
+ // If we have seen a verse and the supplied one is different then we output the collected one.
+ if (*activeOsisID && strcmp(activeOsisID, keyOsisID)) {
- currentVerse = lastKey;
+ currentVerse = lastKey;
- if (!isKJVRef(currentVerse)) {
- makeKJVRef(currentVerse);
- }
+ if (!isKJVRef(currentVerse)) {
+ makeKJVRef(currentVerse);
+ }
#ifdef _ICU_
- int utf8State = detectUTF8(activeVerseText.c_str());
- if (normalize) {
- // Don't need to normalize text that is ASCII
- // But assume other non-UTF-8 text is Latin1 (cp1252) and convert it to UTF-8
- if (!utf8State) {
- cout << "Warning: " << activeOsisID << ": Converting to UTF-8 (" << activeVerseText << ")" << endl;
- converter.processText(activeVerseText, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks
- converted++;
+ int utf8State = detectUTF8(activeVerseText.c_str());
+ if (normalize) {
+ // Don't need to normalize text that is ASCII
+ // But assume other non-UTF-8 text is Latin1 (cp1252) and convert it to UTF-8
+ if (!utf8State) {
+ cout << "Warning: " << activeOsisID << ": Converting to UTF-8 (" << activeVerseText << ")" << endl;
+ converter.processText(activeVerseText, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks
+ converted++;
- // Prepare for double check. This probably can be removed.
- // But for now we are running the check again.
- // This is to determine whether we need to normalize output of the conversion.
- utf8State = detectUTF8(activeVerseText.c_str());
- }
+ // Prepare for double check. This probably can be removed.
+ // But for now we are running the check again.
+ // This is to determine whether we need to normalize output of the conversion.
+ utf8State = detectUTF8(activeVerseText.c_str());
+ }
- // Double check. This probably can be removed.
- if (!utf8State) {
- cout << "Error: " << activeOsisID << ": Converting to UTF-8 (" << activeVerseText << ")" << endl;
- }
+ // Double check. This probably can be removed.
+ if (!utf8State) {
+ cout << "Error: " << activeOsisID << ": Converting to UTF-8 (" << activeVerseText << ")" << endl;
+ }
- if (utf8State > 0) {
- SWBuf before = activeVerseText;
- normalizer.processText(activeVerseText, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks
- if (before != activeVerseText) {
- normalized++;
- }
+ if (utf8State > 0) {
+ SWBuf before = activeVerseText;
+ normalizer.processText(activeVerseText, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks
+ if (before != activeVerseText) {
+ normalized++;
}
}
+ }
#endif
- // If the entry already exists, then append this entry to the text.
- // This is for verses that are outside the KJV versification. They are appended to the prior verse.
- // The space should not be needed if we retained verse tags.
- SWBuf currentText = module->getRawEntry();
- if (currentText.length()) {
- cout << "Appending entry: " << currentVerse.getOSISRef() << ": " << activeVerseText << endl;
- activeVerseText = currentText + " " + activeVerseText;
- }
+ // If the entry already exists, then append this entry to the text.
+ // This is for verses that are outside the KJV versification. They are appended to the prior verse.
+ // The space should not be needed if we retained verse tags.
+ SWBuf currentText = module->getRawEntry();
+ if (currentText.length()) {
+ cout << "Appending entry: " << currentVerse.getOSISRef() << ": " << activeVerseText << endl;
+ activeVerseText = currentText + " " + activeVerseText;
+ }
#ifdef DEBUG
- cout << "Write: " << activeOsisID << ":" << currentVerse.getOSISRef() << ": " << activeVerseText << endl;
+ cout << "Write: " << activeOsisID << ":" << currentVerse.getOSISRef() << ": " << activeVerseText << endl;
#endif
- module->setEntry(activeVerseText);
- activeVerseText = "";
- }
+ module->setEntry(activeVerseText);
+ activeVerseText = "";
+ }
- // The following is for initial verse content and for appending interverse content.
- // Eliminate leading whitespace on the beginning of each verse and
- // before we append to current content, since we just added one
- text.trimStart();
- if (activeVerseText.length()) {
- activeVerseText += " ";
- activeVerseText += text;
- }
- else {
- activeVerseText = text;
- }
- // text has been consumed so clear it out.
- text = "";
+ // The following is for initial verse content and for appending interverse content.
+ // Eliminate leading whitespace on the beginning of each verse and
+ // before we append to current content, since we just added one
+ text.trimStart();
+ if (activeVerseText.length()) {
+ activeVerseText += " ";
+ activeVerseText += text;
+ }
+ else {
+ activeVerseText = text;
+ }
+ // text has been consumed so clear it out.
+ text = "";
- currentVerse = saveKey;
- lastKey = currentVerse;
- strcpy(activeOsisID, keyOsisID);
+ currentVerse = saveKey;
+ lastKey = currentVerse;
+ strcpy(activeOsisID, keyOsisID);
}
@@ -486,28 +488,28 @@
bool handleToken(SWBuf &text, XMLTag token) {
// Everything between the begin book tag and the first begin chapter tag is inBookHeader
- static bool inBookHeader = false;
+ static bool inBookHeader = false;
// Everything between the begin chapter tag and the first begin verse tag is inChapterHeader
- static bool inChapterHeader = false;
+ static bool inChapterHeader = false;
// Flags indicating whether we are processing the content of a verse
- static bool inVerse = false;
+ static bool inVerse = false;
// Used to remember titles that need to be handle specially
- static SWBuf header = "";
- static SWBuf lastTitle = "";
- static int titleOffset = -1;
- static bool inTitle = false;
- static int titleDepth = 0;
+ static SWBuf header = "";
+ static SWBuf lastTitle = "";
+ static int titleOffset = -1;
+ static bool inTitle = false;
+ static int titleDepth = 0;
// Flag indicating whether we are in "Words of Christ"
- static bool inWOC = false;
+ static bool inWOC = false;
// Tag for WOC quotes within a verse
- static XMLTag wocTag = "<q who=\"Jesus\" marker=\"\">";
+ static XMLTag wocTag = "<q who=\"Jesus\" marker=\"\">";
// Flag used to indicate where useful text begins
- static bool firstDiv = false;
+ static bool firstDiv = false;
// Stack of quote elements used to handle Words of Christ
static std::stack<XMLTag> quoteStack;
@@ -520,14 +522,14 @@
static std::stack<XMLTag> tagStack;
// The following are used to validate well-formedness
- static int chapterDepth = 0;
- static int bookDepth = 0;
- static int verseDepth = 0;
+ static int chapterDepth = 0;
+ static int bookDepth = 0;
+ static int verseDepth = 0;
- int tagDepth = tagStack.size();
- const char *tokenName = token.getName();
- bool isEndTag = token.isEndTag() || token.getAttribute("eID");
- const char *typeAttr = token.getAttribute("type");
+ int tagDepth = tagStack.size();
+ const char *tokenName = token.getName();
+ bool isEndTag = token.isEndTag() || token.getAttribute("eID");
+ const char *typeAttr = token.getAttribute("type");
//Titles are treated specially.
// If the title has an attribute type of "main" or "chapter"
@@ -740,7 +742,7 @@
// Otherwise have to do it here
if (!strcmp(tokenName, "q")) {
- quoteStack.push(token);
+ quoteStack.push(token);
#ifdef DEBUG_QUOTE
cout << currentOsisID << ": quote top(" << quoteStack.size() << ") " << token << endl;
#endif
@@ -878,7 +880,7 @@
// and we need to terminate the <q who="Jesus" marker=""> that was added earlier in the verse.
if (token.getAttribute("who") && !strcmp(token.getAttribute("who"), "Jesus")) {
#ifdef DEBUG_QUOTE
- cout << currentOsisID << ": (" << quoteStack.size() << ") " << topToken << " -- " << token << endl;
+ cout << currentOsisID << ": (" << quoteStack.size() << ") " << topToken << " -- " << token << endl;
#endif
inWOC = false;
const char *sID = topToken.getAttribute("sID");
@@ -1004,8 +1006,7 @@
// abbr When would this ever cross a boundary?
// seg as it is used for a divineName hack
// foreign so that it can be easily italicized
- else if (
- !strcmp(tagName, "chapter") ||
+ else if (!strcmp(tagName, "chapter") ||
!strcmp(tagName, "closer") ||
!strcmp(tagName, "div") ||
!strcmp(tagName, "l") ||
@@ -1018,7 +1019,7 @@
sprintf(buf, "gen%d", sID++);
t.setAttribute("sID", buf);
}
- bspTagStack.push(t);
+ bspTagStack.push(t);
#ifdef DEBUG_XFORM
cout << currentOsisID << ": xform push (" << bspTagStack.size() << ") " << t << " (tagname=" << tagName << ")" << endl;
XMLTag topToken = bspTagStack.top();
@@ -1047,8 +1048,7 @@
}
// Look for the milestoneable container tags handled above.
- else if (
- !strcmp(tagName, "chapter") ||
+ else if (!strcmp(tagName, "chapter") ||
!strcmp(tagName, "closer") ||
!strcmp(tagName, "div") ||
!strcmp(tagName, "l") ||
More information about the sword-cvs
mailing list