[sword-svn] r2693 - trunk/utilities

dmsmith at crosswire.org dmsmith at crosswire.org
Sat Mar 24 15:38:01 MST 2012


Author: dmsmith
Date: 2012-03-24 15:38:01 -0700 (Sat, 24 Mar 2012)
New Revision: 2693

Modified:
   trunk/utilities/osis2mod.cpp
Log:
Allow comments to be in osis document and to be stripped out. From John Zaitseff

Modified: trunk/utilities/osis2mod.cpp
===================================================================
--- trunk/utilities/osis2mod.cpp	2012-03-24 22:21:11 UTC (rev 2692)
+++ trunk/utilities/osis2mod.cpp	2012-03-24 22:38:01 UTC (rev 2693)
@@ -1328,6 +1328,16 @@
 }
 
 void processOSIS(istream& infile) {
+	typedef enum {
+		CS_NOT_IN_COMMENT,		// or seen starting "<"
+		CS_SEEN_STARTING_EXCLAMATION,
+		CS_SEEN_STARTING_HYPHEN,
+		CS_IN_COMMENT,
+		CS_SEEN_ENDING_HYPHEN,
+		CS_SEEN_SECOND_ENDING_HYPHEN,
+		CS_SEEN_ENDING_GREATER_THAN
+	} t_commentstate;
+
 	activeOsisID[0] = '\0';
 
 	strcpy(currentOsisID,"N/A");
@@ -1342,6 +1352,8 @@
 
 	SWBuf token;
 	SWBuf text;
+	bool incomment = false;
+	t_commentstate commentstate = CS_NOT_IN_COMMENT;
 	bool intoken = false;
 	bool inWhitespace = false;
 	bool seeingSpace = false;
@@ -1364,6 +1376,95 @@
 			continue;
 		}
 
+		// Handle XML comments starting with "<!--", ending with "-->"
+
+		if (intoken && !incomment) {
+			switch (commentstate) {
+				case CS_NOT_IN_COMMENT :
+					if (curChar == '!') {
+						commentstate = CS_SEEN_STARTING_EXCLAMATION;
+						token.append((char) curChar);
+						continue;
+					} else {
+						break;
+					}
+
+				case CS_SEEN_STARTING_EXCLAMATION :
+					if (curChar == '-') {
+						commentstate = CS_SEEN_STARTING_HYPHEN;
+						token.append((char) curChar);
+						continue;
+					} else {
+						commentstate = CS_NOT_IN_COMMENT;
+						break;
+					}
+
+				case CS_SEEN_STARTING_HYPHEN :
+					if (curChar == '-') {
+						incomment = true;
+						commentstate = CS_IN_COMMENT;
+						token.append((char) curChar);
+
+						if (debug & DEBUG_OTHER) {
+							cout << "DEBUG(COMMENTS): in comment" << endl;
+						}
+
+						continue;
+					} else {
+						commentstate = CS_NOT_IN_COMMENT;
+						break;
+					}
+
+				default:
+					cout << "FATAL(COMMENTS): unknown commentstate on comment start: " << commentstate << endl;
+					exit(EXIT_BAD_NESTING);
+			}
+		}
+
+		if (incomment) {
+			switch (commentstate) {
+				case CS_IN_COMMENT:
+					if (curChar == '-') {
+						commentstate = CS_SEEN_ENDING_HYPHEN;
+						continue;
+					} else {
+						// ignore the character
+						continue;
+					}
+
+				case CS_SEEN_ENDING_HYPHEN :
+					if (curChar == '-') {
+						commentstate = CS_SEEN_SECOND_ENDING_HYPHEN;
+						continue;
+					} else {
+						// ignore character
+						commentstate = CS_IN_COMMENT;
+						continue;
+					}
+
+				case CS_SEEN_SECOND_ENDING_HYPHEN :
+					if (curChar == '>') {
+						intoken = false;
+						incomment = false;
+						commentstate = CS_NOT_IN_COMMENT;
+
+						if (debug & DEBUG_OTHER) {
+							cout << "DEBUG(COMMENTS): out of comment" << endl;
+						}
+
+						continue;
+					} else {
+						// ignore character
+						commentstate = CS_IN_COMMENT;
+						continue;
+					}
+
+				default:
+					cout << "FATAL(COMMENTS): unknown commentstate on comment end: " << commentstate << endl;
+					exit(EXIT_BAD_NESTING);
+			}
+		}
+
 		// Outside of tokens merge adjacent whitespace
 		if (!intoken) {
 			seeingSpace = isspace(curChar)!=0;
@@ -1382,25 +1483,28 @@
 			inWhitespace = false;
 			token.append('>');
 			// take this isalpha if out to check for bugs in text
-			if ((isalpha(token[1])) || (isalpha(token[2]))) {
+			if (isalpha(token[1]) ||
+			    (((token[1] == '/') || (token[1] == '?')) && isalpha(token[2]))) {
 				//cout << "Handle:" << token.c_str() << endl;
 				XMLTag t = transformBSP(token.c_str());
 
 				if (!handleToken(text, t)) {
 					text.append(t);
 				}
+			} else {
+				cout << "WARNING(PARSE): malformed token: " << token << endl;
 			}
 			continue;
 		}
 
 		if (intoken) {
-			token.append((char)curChar);
+			token.append((char) curChar);
 		}
 		else {
 			switch (curChar) {
 				case '>' : text.append(">"); break;
 				case '<' : text.append("<"); break;
-				default  : text.append((char)curChar); break;
+				default  : text.append((char) curChar); break;
 			}
 		}
 	}




More information about the sword-cvs mailing list