[sword-svn] r2049 - trunk/utilities

dmsmith at www.crosswire.org dmsmith at www.crosswire.org
Sun May 13 11:51:30 MST 2007


Author: dmsmith
Date: 2007-05-13 11:51:29 -0700 (Sun, 13 May 2007)
New Revision: 2049

Modified:
   trunk/utilities/osis2mod.cpp
Log:
changed osis2mod's handling of whitespace in input by not calling FileMgr::getLine, but rather using std::ifstream::get instead and changing all whitespace tokens into blanks and merging adjacent spaces into one. Leading whitespace on each verse is still removed.

Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp	2007-05-12 02:17:51 UTC (rev 2048)
+++ trunk/utilities/osis2mod.cpp	2007-05-13 18:51:29 UTC (rev 2049)
@@ -6,9 +6,9 @@
 #include <string>
 #include <stack>
 #include <iostream>
+#include <fstream>
 
 #include <utilstr.h>
-#include <filemgr.h>
 #include <swmgr.h>
 #include <rawtext.h>
 #include <swbuf.h>
@@ -155,6 +155,9 @@
 			activeVerseText = "";
 		}
 
+		// eliminate leading whitespace on the beginning of each verse and
+		// before we append to current content, since we just added one
+		text.trimStart();
 		if (activeVerseText.length()) {
 			activeVerseText += " ";
 			activeVerseText += text;
@@ -695,15 +698,13 @@
 	}
 
 	// Let's see if we can open our input file
-	FileDesc *fd = FileMgr::getSystemFileMgr()->open(osisDoc, FileMgr::RDONLY);
-	if (fd->getFd() < 0) {
+	ifstream infile(osisDoc);
+	if (infile.fail()) {
 		fprintf(stderr, "error: %s: couldn't open input file: %s \n", program, osisDoc);
 		exit(-2);
 	}
 
 	// Do some initialization stuff
-	SWBuf buffer;
-
 	if (compressor){
 		module = new zText(path, 0, 0, iType, compressor);
 	}
@@ -736,22 +737,44 @@
 
 	(*module) = TOP;
 
-	const char *from;
 	SWBuf token;
 	SWBuf text;
 	bool intoken = false;
+	bool inWhitespace = false;
+	bool seeingSpace = false;
+	char curChar = '\0';
 
-	while (FileMgr::getLine(fd, buffer)) {
-		//cout << "Line: " << buffer.c_str() << endl;
-		for (from = buffer.c_str(); *from; from++) {
-			if (!intoken && *from == '<') {
+	while (infile.good()) {
+		
+		curChar = infile.get();
+
+		// skip the character if it is bad. infile.good() will catch the problem
+		if (curChar == -1) {
+			continue;
+		}
+
+			if (!intoken && curChar == '<') {
 				intoken = true;
 				token = "<";
 				continue;
 			}
 
-			if (intoken && *from == '>') {
+			// Outside of tokens merge adjacent whitespace
+			if (!intoken) {
+				seeingSpace = isspace(curChar);
+				if (seeingSpace) {
+					if (inWhitespace) {
+						continue;
+					}
+					// convert all whitespace to blanks
+					curChar = ' ';
+				}
+				inWhitespace = seeingSpace;
+			}
+
+			if (intoken && curChar == '>') {
 				intoken = false;
+				inWhitespace = false;
 				token.append('>');
 				// take this isalpha if out to check for bugs in text
 				if ((isalpha(token[1])) || (isalpha(token[2]))) {
@@ -766,27 +789,22 @@
 			}
 
 			if (intoken)
-				token.append(*from);
+				token.append(curChar);
 			else
-				switch (*from) {
+				switch (curChar) {
 					case '>' : text.append("&gt;"); break;
 					case '<' : text.append("&lt;"); break;
-					default  : text.append(*from); break;
+					default  : text.append(curChar); break;
 				}
-				
-		}
-
-		if (intoken)
-			token.append("\n");
 	}
 
-	// Force the last entry from the buffer.
+	// Force the last entry from the text buffer.
 	text = "";
 	writeEntry(*currentVerse, text, true);
 	delete module;
 	delete currentVerse;
 	if (cipherFilter)
 		delete cipherFilter;
-	FileMgr::getSystemFileMgr()->close(fd);
+	infile.close();
 }
 




More information about the sword-cvs mailing list