[sword-svn] r279 - trunk/modules/nasb/cutil

Thu Jun 3 12:02:27 MST 2010

Author: scribe
Date: 2010-06-03 12:02:27 -0700 (Thu, 03 Jun 2010)
New Revision: 279

Modified:
   trunk/modules/nasb/cutil/nasbosis.cpp
Log:
divine name fixes
~ fixes
other oddities


Modified: trunk/modules/nasb/cutil/nasbosis.cpp
===================================================================

--- trunk/modules/nasb/cutil/nasbosis.cpp	2010-06-02 11:17:47 UTC (rev 278)
+++ trunk/modules/nasb/cutil/nasbosis.cpp	2010-06-03 19:02:27 UTC (rev 279)
@@ -572,10 +572,16 @@
 				if ((intag)&&(!strncmp(lastToken.c_str(), "transChange", 11))) {
 					transChangeStart = i+1;
 				}
+/*
+				if (!strncmp(lastToken.c_str(), "seg", 3)) {
+					strongsFound = false;
+					strongsStart = i+1;
+				}
 				if (!strncmp(lastToken.c_str(), "divineName", 10)) {
 					strongsFound = false;
 					strongsStart = i+1;
 				}
+*/
 				if (!strncmp(lastToken.c_str(), "/divineName", 10)) {
 					strongsFound = false;
 					strongsStart = i+1;
@@ -610,16 +616,19 @@
 					strongsFound = false;
 					strongsStart = i+1;
 				}
-				if ((!strncmp(lastToken.c_str(), "seg", 3)) ||
-						(!strncmp(lastToken.c_str(), "verse", 5))) {
+				if ((!strncmp(lastToken.c_str(), "verse", 5))) {
 					intag = false;
 				}
 
-				// fix tenseChange to be inside <w> so we can include a subset of the <w> content.
-				if ((!strncmp(lastToken.c_str(), "MG", 2)) ||
+				if (            (!strncmp(lastToken.c_str(), "MG", 2)) ||
 						(!strncmp(lastToken.c_str(), "MH", 2))) {
+
+					// insert </w>
+					// fix tenseChange to be inside <w> so we can include a subset of the <w> content.
 					outstring.replace(tokenStart-1, lastToken.length()+2, ((tenseChange > -1) ? "</w></transChange>":"</w>"));
 					i = (tokenStart-1) + ((tenseChange > -1) ? 18:4);
+					
+					// build <w ... > tag
 					char lang = lastToken[1];	// H or G
 					lastToken.replace(0, 1, "<w lemma=\"strong:");
 					while ((start = lastToken.find(", ")) > -1) {
@@ -627,6 +636,8 @@
 					}
 					lastToken += "\">";
 					intag = false;
+
+
 					if (tenseChange > -1) {
 						lastToken.insert(0, "<transChange type=\"tenseChange\">");
 					}
@@ -635,11 +646,13 @@
 						intag = true;
 						i += lastToken.length() - 1; // (-1 because we're about to i++)
 					}
+
+					// insert our token
 					else {
 						outstring.insert(strongsStart, lastToken);
 						i += lastToken.length() - 1; // (-1 because we're about to i++)
 					}
-					strongsStart = i;
+					strongsStart = i+1;
 					strongsFound = false;
 					if (tenseChange > -1) {
 						// relocate because position may have changed from all the token inserts
@@ -653,6 +666,27 @@
 		}
 
 
+		// clean up stuff that didn't work quite right
+		while (1) {
+
+			// divineName strongs tags misorderings
+			string target = "</w></divineName></seg>";
+			size_t s = outstring.find(target);
+			if (s != string::npos) {
+				outstring.replace(s, target.length(), "</divineName></seg></w>");
+				continue;
+			}
+			target = "</w>,</divineName></seg>";
+			s = outstring.find(target);
+			if (s != string::npos) {
+				outstring.replace(s, target.length(), "</divineName></seg></w>,");
+				continue;
+			}
+
+			break;
+		}
+
+
 		std::cout << outstring;
 		if (!result) std::cout << "\n";
 	}
@@ -718,7 +752,7 @@
 			const char *found = strstr(outstr, "L\\{ORD'S}/");
 			int start = (found) ? (found - outstr) : -1;
 			if (start > -1) {
-				outstring.replace(start, 10, "<divineName>Lord's</divineName>");
+				outstring.replace(start, 10, "<seg><divineName>Lord's</divineName></seg>");
 				continue;
 			}
 
@@ -819,33 +853,49 @@
 				continue;
 			}
 
+			// ~“
+			string target = "~“";
+			s = outstring.find(target);
+			if (s != string::npos) {
+				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"“\"/>");
+				continue;
+			}
 
+			// ~‘
+			target = "~‘";
+			s = outstring.find(target);
+			if (s != string::npos) {
+				outstring.replace(s, target.length(), "<milestone type=\"cQuote\" marker=\"‘\"/>");
+				continue;
+			}
+
+
 			const char *outstr = outstring.c_str();
 			const char *found = strstr(outstr, "L\\{ORD}/'\\{S}/");
 			int start = (found) ? (found - outstr) : -1;
 			if (start > -1) {
-				outstring.replace(start, 14, "<divineName>Lord's</divineName>");
+				outstring.replace(start, 14, "<seg><divineName>Lord's</divineName></seg>");
 				continue;
 			}
 			outstr = outstring.c_str();
 			found = strstr(outstr, "L\\{ORD}/");
 			start = (found) ? (found - outstr) : -1;
 			if (start > -1) {
-				outstring.replace(start, 8, "<divineName>Lord</divineName>,");
+				outstring.replace(start, 8, "<seg><divineName>Lord</divineName></seg>,");
 				continue;
 			}
 			outstr = outstring.c_str();
 			found = strstr(outstr, "Y\\{AH,}/");
 			start = (found) ? (found - outstr) : -1;
 			if (start > -1) {
-				outstring.replace(start, 8, "<divineName>Yah</divineName>,");
+				outstring.replace(start, 8, "<seg><divineName>Yah</divineName></seg>,");
 				continue;
 			}
 			outstr = outstring.c_str();
 			found = strstr(outstr, "Y\\{AH}/");
 			start = (found) ? (found - outstr) : -1;
 			if (start > -1) {
-				outstring.replace(start, 7, "<divineName>Yah</divineName>");
+				outstring.replace(start, 7, "<seg><divineName>Yah</divineName></seg>");
 				continue;
 			}
 			// is this really valid markup?  should 'also be' be in small
@@ -854,28 +904,28 @@
 			found = strstr(outstr, "L\\{ORD {also be}/}");
 			start = (found) ? (found - outstr) : -1;
 			if (start > -1) {
-				outstring.replace(start, 18, "<divineName>Lord</divineName> also be}");
+				outstring.replace(start, 18, "<seg><divineName>Lord</divineName></seg> also be}");
 				continue;
 			}
 			outstr = outstring.c_str();
 			found = strstr(outstr, "L\\{ORD {give}/}");
 			start = (found) ? (found - outstr) : -1;
 			if (start > -1) {
-				outstring.replace(start, 15, "<divineName>Lord</divineName> give}");
+				outstring.replace(start, 15, "<seg><divineName>Lord</divineName></seg> give}");
 				continue;
 			}
 			outstr = outstring.c_str();
 			found = strstr(outstr, "L\\{ORD {bless}/}");
 			start = (found) ? (found - outstr) : -1;
 			if (start > -1) {
-				outstring.replace(start, 16, "<divineName>Lord</divineName> bless}");
+				outstring.replace(start, 16, "<seg><divineName>Lord</divineName></seg> bless}");
 				continue;
 			}
 			outstr = outstring.c_str();
 			found = strstr(outstr, "L\\{ORD {are my Refuge; You have made the Most High your dwelling place}/}");
 			start = (found) ? (found - outstr) : -1;
 			if (start > -1) {
-				outstring.replace(start, 73, "<divineName>Lord</divineName> are my Refuge; You have made the Most High your dwelling place}");
+				outstring.replace(start, 73, "<seg><divineName>Lord</divineName></seg> are my Refuge; You have made the Most High your dwelling place}");
 				continue;
 			}
 			// LB ??? Don't have info on this.  Assuming '-'
@@ -903,8 +953,8 @@
 					start += 22;
 				}
 				else {
-					outstring.insert(start, "<divineName>");
-					start += 12;
+					outstring.insert(start, "<seg><divineName>");
+					start += 17;
 					const char *b = outstring.c_str();
 					const char *found = strstr(b, "L\\{ORD}/");
 					int s = (found) ? (found - b) : -1;
@@ -949,8 +999,8 @@
 					end+=6;
 				}
 				else {
-					outstring.insert(end, "</divineName>");
-					end+=13;
+					outstring.insert(end, "</divineName></seg>");
+					end+=19;
 				}
 				continue;
 			}