[sword-svn] r1981 - in trunk: include src/modules/filters
scribe at www.crosswire.org
scribe at www.crosswire.org
Wed Oct 4 02:28:31 MST 2006
Author: scribe
Date: 2006-10-04 02:28:28 -0700 (Wed, 04 Oct 2006)
New Revision: 1981
Modified:
trunk/include/swbasicfilter.h
trunk/src/modules/filters/osishtmlhref.cpp
trunk/src/modules/filters/osisosis.cpp
trunk/src/modules/filters/swbasicfilter.cpp
trunk/src/modules/filters/thmlgbf.cpp
trunk/src/modules/filters/thmlhtml.cpp
trunk/src/modules/filters/thmlhtmlhref.cpp
trunk/src/modules/filters/thmlosis.cpp
trunk/src/modules/filters/thmlplain.cpp
trunk/src/modules/filters/thmlrtf.cpp
Log:
Applied patch from DM Smith which adds better handling for escape sequences.
Modified: trunk/include/swbasicfilter.h
===================================================================
--- trunk/include/swbasicfilter.h 2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/include/swbasicfilter.h 2006-10-04 09:28:28 UTC (rev 1981)
@@ -70,6 +70,7 @@
bool tokenCaseSensitive;
bool passThruUnknownToken;
bool passThruUnknownEsc;
+ bool passThruNumericEsc;
char processStages;
@@ -116,14 +117,28 @@
*/
void setPassThruUnknownEscapeString(bool val);
+ /** Sets whether to pass thru a numeric escape sequence unchanged
+ * or allow it to be handled otherwise.
+ * Default is false.*/
+ void setPassThruNumericEscapeString(bool val);
+
/** Are escapeStrings case sensitive or not? Call this
* function before addEscapeStingSubstitute()
*/
void setEscapeStringCaseSensitive(bool val);
+ /** Registers an esc control sequence that can pass unchanged
+ */
+ void addAllowedEscapeString(const char *findString);
+
+ /** Unregisters an esc control sequence that can pass unchanged
+ */
+ void removeAllowedEscapeString(const char *findString);
+
/** Registers an esc control sequence
*/
void addEscapeStringSubstitute(const char *findString, const char *replaceString);
+
/** Unregisters an esc control sequence
*/
void removeEscapeStringSubstitute(const char *findString);
@@ -131,6 +146,12 @@
/** This function performs the substitution of escapeStrings */
bool substituteEscapeString(SWBuf &buf, const char *escString);
+ /** This passes allowed escapeStrings */
+ bool passAllowedEscapeString(SWBuf &buf, const char *escString);
+
+ /** This appends escString to buf as an entity */
+ void appendEscapeString(SWBuf &buf, const char *escString);
+
/** Are tokens case sensitive (like in GBF) or not? Call this
* function before addTokenSubstitute()
*/
@@ -168,6 +189,14 @@
*/
virtual bool handleEscapeString(SWBuf &buf, const char *escString, BasicFilterUserData *userData);
+ /** This function is called for all numeric escape sequences. If passThrough
+ * @param buf the output buffer
+ * @param escString the escape sequence (e.g. <code>"#235"</code> for &235;)
+ * @return subclasses should return true if they handled the esc seq, or false if they did not.
+ */
+ virtual bool handleNumericEscapeString(SWBuf &buf, const char *escString);
+
+
};
SWORD_NAMESPACE_END
Modified: trunk/src/modules/filters/osishtmlhref.cpp
===================================================================
--- trunk/src/modules/filters/osishtmlhref.cpp 2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/osishtmlhref.cpp 2006-10-04 09:28:28 UTC (rev 1981)
@@ -63,15 +63,13 @@
setEscapeEnd(";");
setEscapeStringCaseSensitive(true);
+ setPassThruNumericEscapeString(true);
-// commenting these out. If someone is sure we shouldn't
-// convert these since we are outputing to a markup that
-// recognizes them, then please delete these lines
-// addEscapeStringSubstitute("amp", "&");
-// addEscapeStringSubstitute("apos", "'");
-// addEscapeStringSubstitute("lt", "<");
-// addEscapeStringSubstitute("gt", ">");
-// addEscapeStringSubstitute("quot", "\"");
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("apos");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
setTokenCaseSensitive(true);
Modified: trunk/src/modules/filters/osisosis.cpp
===================================================================
--- trunk/src/modules/filters/osisosis.cpp 2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/osisosis.cpp 2006-10-04 09:28:28 UTC (rev 1981)
@@ -35,6 +35,14 @@
setEscapeEnd(";");
setEscapeStringCaseSensitive(true);
+ setPassThruNumericEscapeString(true);
+
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("apos");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
+
setTokenCaseSensitive(true);
}
Modified: trunk/src/modules/filters/swbasicfilter.cpp
===================================================================
--- trunk/src/modules/filters/swbasicfilter.cpp 2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/swbasicfilter.cpp 2006-10-04 09:28:28 UTC (rev 1981)
@@ -29,16 +29,19 @@
#include <utilstr.h>
#include <stringmgr.h>
#include <map>
+#include <set>
SWORD_NAMESPACE_START
typedef std::map<SWBuf, SWBuf> DualStringMap;
+typedef std::set<SWBuf> StringSet;
// I hate bridge patterns but this isolates std::map from a ton of filters
class SWBasicFilter::Private {
public:
DualStringMap tokenSubMap;
DualStringMap escSubMap;
+ StringSet escPassSet;
};
const char SWBasicFilter::INITIALIZE = 1;
@@ -65,6 +68,7 @@
tokenCaseSensitive = false;
passThruUnknownToken = false;
passThruUnknownEsc = false;
+ passThruNumericEsc = false;
}
@@ -94,7 +98,11 @@
passThruUnknownEsc = val;
}
+void SWBasicFilter::setPassThruNumericEscapeString(bool val) {
+ passThruUnknownEsc = val;
+}
+
void SWBasicFilter::setTokenCaseSensitive(bool val) {
tokenCaseSensitive = val;
}
@@ -124,6 +132,24 @@
}
}
+void SWBasicFilter::addAllowedEscapeString(const char *findString) {
+ char *buf = 0;
+
+ if (!escStringCaseSensitive) {
+ stdstr(&buf, findString);
+ toupperstr(buf);
+ p->escPassSet.insert(StringSet::value_type(buf));
+ delete [] buf;
+ }
+ else p->escPassSet.insert(StringSet::value_type(findString));
+}
+
+void SWBasicFilter::removeAllowedEscapeString(const char *findString) {
+ if (p->escPassSet.find(findString) != p->escPassSet.end()) {
+ p->escPassSet.erase( p->escPassSet.find(findString) );
+ }
+}
+
void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) {
char *buf = 0;
@@ -161,9 +187,51 @@
return false;
}
+void SWBasicFilter::appendEscapeString(SWBuf &buf, const char *escString) {
+ buf += escStart;
+ buf += escString;
+ buf += escEnd;
+}
+
+bool SWBasicFilter::passAllowedEscapeString(SWBuf &buf, const char *escString) {
+ StringSet::iterator it;
+
+ if (!escStringCaseSensitive) {
+ char *tmp = 0;
+ stdstr(&tmp, escString);
+ toupperstr(tmp);
+ it = p->escPassSet.find(tmp);
+ delete [] tmp;
+ } else
+ it = p->escPassSet.find(escString);
+
+ if (it != p->escPassSet.end()) {
+ appendEscapeString(buf, escString);
+ return true;
+ }
+
+ return false;
+}
+
+bool SWBasicFilter::handleNumericEscapeString(SWBuf &buf, const char *escString) {
+ if (passThruNumericEsc) {
+ appendEscapeString(buf, escString);
+ return true;
+ }
+ return false;
+}
+
bool SWBasicFilter::substituteEscapeString(SWBuf &buf, const char *escString) {
DualStringMap::iterator it;
+ if (*escString == '#') {
+ return handleNumericEscapeString(buf, escString);
+ }
+
+ if (passAllowedEscapeString(buf, escString)) {
+ return true;
+ }
+
if (!escStringCaseSensitive) {
char *tmp = 0;
stdstr(&tmp, escString);
@@ -278,9 +346,7 @@
if (!userData->suspendTextPassThru) { //if text through is disabled no tokens should pass, too
if ((!handleEscapeString(text, token, userData)) && (passThruUnknownEsc)) {
- text += escStart;
- text += token;
- text += escEnd;
+ appendEscapeString(text, token);
}
}
escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
Modified: trunk/src/modules/filters/thmlgbf.cpp
===================================================================
--- trunk/src/modules/filters/thmlgbf.cpp 2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/thmlgbf.cpp 2006-10-04 09:28:28 UTC (rev 1981)
@@ -64,7 +64,7 @@
else if (!strncmp("amp", token, 3)) text += '&';
else if (!strncmp("lt", token, 2)) text += '<';
else if (!strncmp("gt", token, 2)) text += '>';
- else if (!strncmp("brvbar", token, 6)) text += '|';
+ else if (!strncmp("brvbar", token, 6)) text += '¦';
else if (!strncmp("sect", token, 4)) text += '§';
else if (!strncmp("copy", token, 4)) text += '©';
else if (!strncmp("laquo", token, 5)) text += '«';
@@ -155,6 +155,13 @@
else if (!strncmp("uml", token, 3)) text += '¨';
else if (!strncmp("shy", token, 3)) text += '';
else if (!strncmp("macr", token, 4)) text += '¯';
+ else if (!strncmp("micro", token, 5)) text += "µ";
+ else if (!strncmp("middot", token, 6)) text +="·";
+ else if (!strncmp("cedil", token, 5)) text += "¸";
+ else if (!strncmp("ordm", token, 4)) text += "º";
+ else if (!strncmp("times", token, 5)) text += "×";
+ else if (!strncmp("divide", token, 6)) text +="÷";
+ else if (!strncmp("oslash", token, 6)) text +="ø";
continue;
}
Modified: trunk/src/modules/filters/thmlhtml.cpp
===================================================================
--- trunk/src/modules/filters/thmlhtml.cpp 2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/thmlhtml.cpp 2006-10-04 09:28:28 UTC (rev 1981)
@@ -24,109 +24,119 @@
ThMLHTML::ThMLHTML() {
setTokenStart("<");
setTokenEnd(">");
-/*
+
setEscapeStart("&");
setEscapeEnd(";");
setEscapeStringCaseSensitive(true);
+ setPassThruNumericEscapeString(true);
- addEscapeStringSubstitute("nbsp", " ");
- addEscapeStringSubstitute("quot", "\"");
- addEscapeStringSubstitute("amp", "&");
- addEscapeStringSubstitute("lt", "<");
- addEscapeStringSubstitute("gt", ">");
- addEscapeStringSubstitute("brvbar", "|");
- addEscapeStringSubstitute("sect", "§");
- addEscapeStringSubstitute("copy", "©");
- addEscapeStringSubstitute("laquo", "«");
- addEscapeStringSubstitute("reg", "®");
- addEscapeStringSubstitute("acute", "´");
- addEscapeStringSubstitute("para", "¶");
- addEscapeStringSubstitute("raquo", "»");
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
- addEscapeStringSubstitute("Aacute", "Á");
- addEscapeStringSubstitute("Agrave", "À");
- addEscapeStringSubstitute("Acirc", "Â");
- addEscapeStringSubstitute("Auml", "Ä");
- addEscapeStringSubstitute("Atilde", "Ã");
- addEscapeStringSubstitute("Aring", "Å");
- addEscapeStringSubstitute("aacute", "á");
- addEscapeStringSubstitute("agrave", "à");
- addEscapeStringSubstitute("acirc", "â");
- addEscapeStringSubstitute("auml", "ä");
- addEscapeStringSubstitute("atilde", "ã");
- addEscapeStringSubstitute("aring", "å");
- addEscapeStringSubstitute("Eacute", "É");
- addEscapeStringSubstitute("Egrave", "È");
- addEscapeStringSubstitute("Ecirc", "Ê");
- addEscapeStringSubstitute("Euml", "Ë");
- addEscapeStringSubstitute("eacute", "é");
- addEscapeStringSubstitute("egrave", "è");
- addEscapeStringSubstitute("ecirc", "ê");
- addEscapeStringSubstitute("euml", "ë");
- addEscapeStringSubstitute("Iacute", "Í");
- addEscapeStringSubstitute("Igrave", "Ì");
- addEscapeStringSubstitute("Icirc", "Î");
- addEscapeStringSubstitute("Iuml", "Ï");
- addEscapeStringSubstitute("iacute", "í");
- addEscapeStringSubstitute("igrave", "ì");
- addEscapeStringSubstitute("icirc", "î");
- addEscapeStringSubstitute("iuml", "ï");
- addEscapeStringSubstitute("Oacute", "Ó");
- addEscapeStringSubstitute("Ograve", "Ò");
- addEscapeStringSubstitute("Ocirc", "Ô");
- addEscapeStringSubstitute("Ouml", "Ö");
- addEscapeStringSubstitute("Otilde", "Õ");
- addEscapeStringSubstitute("oacute", "ó");
- addEscapeStringSubstitute("ograve", "ò");
- addEscapeStringSubstitute("ocirc", "ô");
- addEscapeStringSubstitute("ouml", "ö");
- addEscapeStringSubstitute("otilde", "õ");
- addEscapeStringSubstitute("Uacute", "Ú");
- addEscapeStringSubstitute("Ugrave", "Ù");
- addEscapeStringSubstitute("Ucirc", "Û");
- addEscapeStringSubstitute("Uuml", "Ü");
- addEscapeStringSubstitute("uacute", "ú");
- addEscapeStringSubstitute("ugrave", "ù");
- addEscapeStringSubstitute("ucirc", "û");
- addEscapeStringSubstitute("uuml", "ü");
- addEscapeStringSubstitute("Yacute", "Ý");
- addEscapeStringSubstitute("yacute", "ý");
- addEscapeStringSubstitute("yuml", "ÿ");
+ addAllowedEscapeString("nbsp");
+ addAllowedEscapeString("brvbar"); // "¦"
+ addAllowedEscapeString("sect"); // "§"
+ addAllowedEscapeString("copy"); // "©"
+ addAllowedEscapeString("laquo"); // "«"
+ addAllowedEscapeString("reg"); // "®"
+ addAllowedEscapeString("acute"); // "´"
+ addAllowedEscapeString("para"); // "¶"
+ addAllowedEscapeString("raquo"); // "»"
- addEscapeStringSubstitute("deg", "°");
- addEscapeStringSubstitute("plusmn", "±");
- addEscapeStringSubstitute("sup2", "²");
- addEscapeStringSubstitute("sup3", "³");
- addEscapeStringSubstitute("sup1", "¹");
- addEscapeStringSubstitute("nbsp", "º");
- addEscapeStringSubstitute("pound", "£");
- addEscapeStringSubstitute("cent", "¢");
- addEscapeStringSubstitute("frac14", "¼");
- addEscapeStringSubstitute("frac12", "½");
- addEscapeStringSubstitute("frac34", "¾");
- addEscapeStringSubstitute("iquest", "¿");
- addEscapeStringSubstitute("iexcl", "¡");
- addEscapeStringSubstitute("ETH", "Ð");
- addEscapeStringSubstitute("eth", "ð");
- addEscapeStringSubstitute("THORN", "Þ");
- addEscapeStringSubstitute("thorn", "þ");
- addEscapeStringSubstitute("AElig", "Æ");
- addEscapeStringSubstitute("aelig", "æ");
- addEscapeStringSubstitute("Oslash", "Ø");
- addEscapeStringSubstitute("curren", "¤");
- addEscapeStringSubstitute("Ccedil", "Ç");
- addEscapeStringSubstitute("ccedil", "ç");
- addEscapeStringSubstitute("szlig", "ß");
- addEscapeStringSubstitute("Ntilde", "Ñ");
- addEscapeStringSubstitute("ntilde", "ñ");
- addEscapeStringSubstitute("yen", "¥");
- addEscapeStringSubstitute("not", "¬");
- addEscapeStringSubstitute("ordf", "ª");
- addEscapeStringSubstitute("uml", "¨");
- addEscapeStringSubstitute("shy", "");
- addEscapeStringSubstitute("macr", "¯");
-*/
+ addAllowedEscapeString("Aacute"); // "Á"
+ addAllowedEscapeString("Agrave"); // "À"
+ addAllowedEscapeString("Acirc"); // "Â"
+ addAllowedEscapeString("Auml"); // "Ä"
+ addAllowedEscapeString("Atilde"); // "Ã"
+ addAllowedEscapeString("Aring"); // "Å"
+ addAllowedEscapeString("aacute"); // "á"
+ addAllowedEscapeString("agrave"); // "à"
+ addAllowedEscapeString("acirc"); // "â"
+ addAllowedEscapeString("auml"); // "ä"
+ addAllowedEscapeString("atilde"); // "ã"
+ addAllowedEscapeString("aring"); // "å"
+ addAllowedEscapeString("Eacute"); // "É"
+ addAllowedEscapeString("Egrave"); // "È"
+ addAllowedEscapeString("Ecirc"); // "Ê"
+ addAllowedEscapeString("Euml"); // "Ë"
+ addAllowedEscapeString("eacute"); // "é"
+ addAllowedEscapeString("egrave"); // "è"
+ addAllowedEscapeString("ecirc"); // "ê"
+ addAllowedEscapeString("euml"); // "ë"
+ addAllowedEscapeString("Iacute"); // "Í"
+ addAllowedEscapeString("Igrave"); // "Ì"
+ addAllowedEscapeString("Icirc"); // "Î"
+ addAllowedEscapeString("Iuml"); // "Ï"
+ addAllowedEscapeString("iacute"); // "í"
+ addAllowedEscapeString("igrave"); // "ì"
+ addAllowedEscapeString("icirc"); // "î"
+ addAllowedEscapeString("iuml"); // "ï"
+ addAllowedEscapeString("Oacute"); // "Ó"
+ addAllowedEscapeString("Ograve"); // "Ò"
+ addAllowedEscapeString("Ocirc"); // "Ô"
+ addAllowedEscapeString("Ouml"); // "Ö"
+ addAllowedEscapeString("Otilde"); // "Õ"
+ addAllowedEscapeString("oacute"); // "ó"
+ addAllowedEscapeString("ograve"); // "ò"
+ addAllowedEscapeString("ocirc"); // "ô"
+ addAllowedEscapeString("ouml"); // "ö"
+ addAllowedEscapeString("otilde"); // "õ"
+ addAllowedEscapeString("Uacute"); // "Ú"
+ addAllowedEscapeString("Ugrave"); // "Ù"
+ addAllowedEscapeString("Ucirc"); // "Û"
+ addAllowedEscapeString("Uuml"); // "Ü"
+ addAllowedEscapeString("uacute"); // "ú"
+ addAllowedEscapeString("ugrave"); // "ù"
+ addAllowedEscapeString("ucirc"); // "û"
+ addAllowedEscapeString("uuml"); // "ü"
+ addAllowedEscapeString("Yacute"); // "Ý"
+ addAllowedEscapeString("yacute"); // "ý"
+ addAllowedEscapeString("yuml"); // "ÿ"
+
+ addAllowedEscapeString("deg"); // "°"
+ addAllowedEscapeString("plusmn"); // "±"
+ addAllowedEscapeString("sup2"); // "²"
+ addAllowedEscapeString("sup3"); // "³"
+ addAllowedEscapeString("sup1"); // "¹"
+ addAllowedEscapeString("nbsp"); // "º"
+ addAllowedEscapeString("pound"); // "£"
+ addAllowedEscapeString("cent"); // "¢"
+ addAllowedEscapeString("frac14"); // "¼"
+ addAllowedEscapeString("frac12"); // "½"
+ addAllowedEscapeString("frac34"); // "¾"
+ addAllowedEscapeString("iquest"); // "¿"
+ addAllowedEscapeString("iexcl"); // "¡"
+ addAllowedEscapeString("ETH"); // "Ð"
+ addAllowedEscapeString("eth"); // "ð"
+ addAllowedEscapeString("THORN"); // "Þ"
+ addAllowedEscapeString("thorn"); // "þ"
+ addAllowedEscapeString("AElig"); // "Æ"
+ addAllowedEscapeString("aelig"); // "æ"
+ addAllowedEscapeString("Oslash"); // "Ø"
+ addAllowedEscapeString("curren"); // "¤"
+ addAllowedEscapeString("Ccedil"); // "Ç"
+ addAllowedEscapeString("ccedil"); // "ç"
+ addAllowedEscapeString("szlig"); // "ß"
+ addAllowedEscapeString("Ntilde"); // "Ñ"
+ addAllowedEscapeString("ntilde"); // "ñ"
+ addAllowedEscapeString("yen"); // "¥"
+ addAllowedEscapeString("not"); // "¬"
+ addAllowedEscapeString("ordf"); // "ª"
+ addAllowedEscapeString("uml"); // "¨"
+ addAllowedEscapeString("shy"); // ""
+ addAllowedEscapeString("macr"); // "¯"
+
+ addAllowedEscapeString("micro"); // "µ"
+ addAllowedEscapeString("middot"); // "·"
+ addAllowedEscapeString("cedil"); // "¸"
+ addAllowedEscapeString("ordm"); // "º"
+ addAllowedEscapeString("times"); // "×"
+ addAllowedEscapeString("divide"); // "÷"
+ addAllowedEscapeString("oslash"); // "ø"
+
setTokenCaseSensitive(true);
addTokenSubstitute("note", " <font color=\"#800000\"><small>(");
Modified: trunk/src/modules/filters/thmlhtmlhref.cpp
===================================================================
--- trunk/src/modules/filters/thmlhtmlhref.cpp 2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/thmlhtmlhref.cpp 2006-10-04 09:28:28 UTC (rev 1981)
@@ -38,6 +38,118 @@
setTokenStart("<");
setTokenEnd(">");
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+ setPassThruNumericEscapeString(true);
+
+ addAllowedEscapeString("quot");
+ addAllowedEscapeString("amp");
+ addAllowedEscapeString("lt");
+ addAllowedEscapeString("gt");
+
+ addAllowedEscapeString("nbsp");
+ addAllowedEscapeString("brvbar"); // "¦"
+ addAllowedEscapeString("sect"); // "§"
+ addAllowedEscapeString("copy"); // "©"
+ addAllowedEscapeString("laquo"); // "«"
+ addAllowedEscapeString("reg"); // "®"
+ addAllowedEscapeString("acute"); // "´"
+ addAllowedEscapeString("para"); // "¶"
+ addAllowedEscapeString("raquo"); // "»"
+
+ addAllowedEscapeString("Aacute"); // "Á"
+ addAllowedEscapeString("Agrave"); // "À"
+ addAllowedEscapeString("Acirc"); // "Â"
+ addAllowedEscapeString("Auml"); // "Ä"
+ addAllowedEscapeString("Atilde"); // "Ã"
+ addAllowedEscapeString("Aring"); // "Å"
+ addAllowedEscapeString("aacute"); // "á"
+ addAllowedEscapeString("agrave"); // "à"
+ addAllowedEscapeString("acirc"); // "â"
+ addAllowedEscapeString("auml"); // "ä"
+ addAllowedEscapeString("atilde"); // "ã"
+ addAllowedEscapeString("aring"); // "å"
+ addAllowedEscapeString("Eacute"); // "É"
+ addAllowedEscapeString("Egrave"); // "È"
+ addAllowedEscapeString("Ecirc"); // "Ê"
+ addAllowedEscapeString("Euml"); // "Ë"
+ addAllowedEscapeString("eacute"); // "é"
+ addAllowedEscapeString("egrave"); // "è"
+ addAllowedEscapeString("ecirc"); // "ê"
+ addAllowedEscapeString("euml"); // "ë"
+ addAllowedEscapeString("Iacute"); // "Í"
+ addAllowedEscapeString("Igrave"); // "Ì"
+ addAllowedEscapeString("Icirc"); // "Î"
+ addAllowedEscapeString("Iuml"); // "Ï"
+ addAllowedEscapeString("iacute"); // "í"
+ addAllowedEscapeString("igrave"); // "ì"
+ addAllowedEscapeString("icirc"); // "î"
+ addAllowedEscapeString("iuml"); // "ï"
+ addAllowedEscapeString("Oacute"); // "Ó"
+ addAllowedEscapeString("Ograve"); // "Ò"
+ addAllowedEscapeString("Ocirc"); // "Ô"
+ addAllowedEscapeString("Ouml"); // "Ö"
+ addAllowedEscapeString("Otilde"); // "Õ"
+ addAllowedEscapeString("oacute"); // "ó"
+ addAllowedEscapeString("ograve"); // "ò"
+ addAllowedEscapeString("ocirc"); // "ô"
+ addAllowedEscapeString("ouml"); // "ö"
+ addAllowedEscapeString("otilde"); // "õ"
+ addAllowedEscapeString("Uacute"); // "Ú"
+ addAllowedEscapeString("Ugrave"); // "Ù"
+ addAllowedEscapeString("Ucirc"); // "Û"
+ addAllowedEscapeString("Uuml"); // "Ü"
+ addAllowedEscapeString("uacute"); // "ú"
+ addAllowedEscapeString("ugrave"); // "ù"
+ addAllowedEscapeString("ucirc"); // "û"
+ addAllowedEscapeString("uuml"); // "ü"
+ addAllowedEscapeString("Yacute"); // "Ý"
+ addAllowedEscapeString("yacute"); // "ý"
+ addAllowedEscapeString("yuml"); // "ÿ"
+
+ addAllowedEscapeString("deg"); // "°"
+ addAllowedEscapeString("plusmn"); // "±"
+ addAllowedEscapeString("sup2"); // "²"
+ addAllowedEscapeString("sup3"); // "³"
+ addAllowedEscapeString("sup1"); // "¹"
+ addAllowedEscapeString("nbsp"); // "º"
+ addAllowedEscapeString("pound"); // "£"
+ addAllowedEscapeString("cent"); // "¢"
+ addAllowedEscapeString("frac14"); // "¼"
+ addAllowedEscapeString("frac12"); // "½"
+ addAllowedEscapeString("frac34"); // "¾"
+ addAllowedEscapeString("iquest"); // "¿"
+ addAllowedEscapeString("iexcl"); // "¡"
+ addAllowedEscapeString("ETH"); // "Ð"
+ addAllowedEscapeString("eth"); // "ð"
+ addAllowedEscapeString("THORN"); // "Þ"
+ addAllowedEscapeString("thorn"); // "þ"
+ addAllowedEscapeString("AElig"); // "Æ"
+ addAllowedEscapeString("aelig"); // "æ"
+ addAllowedEscapeString("Oslash"); // "Ø"
+ addAllowedEscapeString("curren"); // "¤"
+ addAllowedEscapeString("Ccedil"); // "Ç"
+ addAllowedEscapeString("ccedil"); // "ç"
+ addAllowedEscapeString("szlig"); // "ß"
+ addAllowedEscapeString("Ntilde"); // "Ñ"
+ addAllowedEscapeString("ntilde"); // "ñ"
+ addAllowedEscapeString("yen"); // "¥"
+ addAllowedEscapeString("not"); // "¬"
+ addAllowedEscapeString("ordf"); // "ª"
+ addAllowedEscapeString("uml"); // "¨"
+ addAllowedEscapeString("shy"); // ""
+ addAllowedEscapeString("macr"); // "¯"
+
+ addAllowedEscapeString("micro"); // "µ"
+ addAllowedEscapeString("middot"); // "·"
+ addAllowedEscapeString("cedil"); // "¸"
+ addAllowedEscapeString("ordm"); // "º"
+ addAllowedEscapeString("times"); // "×"
+ addAllowedEscapeString("divide"); // "÷"
+ addAllowedEscapeString("oslash"); // "ø"
+
setTokenCaseSensitive(true);
addTokenSubstitute("scripture", "<i> ");
addTokenSubstitute("/scripture", "</i> ");
Modified: trunk/src/modules/filters/thmlosis.cpp
===================================================================
--- trunk/src/modules/filters/thmlosis.cpp 2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/thmlosis.cpp 2006-10-04 09:28:28 UTC (rev 1981)
@@ -32,6 +32,7 @@
int tokpos = 0;
bool intoken = false;
bool keepToken = false;
+ bool ampersand = false;
// static QuoteStack quoteStack;
@@ -74,13 +75,138 @@
token[0] = 0;
token[1] = 0;
token[2] = 0;
+ ampersand = false;
textEnd = from-1;
wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to!
// wordEnd = to;
continue;
}
-
+
+ if (*from == '&') {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ ampersand = true;
+ continue;
+ }
+
+ if (*from == ';' && ampersand) {
+ intoken = false;
+ ampersand = false;
+
+ if (*token == '#') {
+ text += '&';
+ text += token;
+ text += ';';
+ }
+ else if (!strncmp("nbsp", token, 4)) text += ' ';
+ else if (!strncmp("quot", token, 4)) text += '"';
+ else if (!strncmp("amp", token, 3)) text += '&';
+ else if (!strncmp("lt", token, 2)) text += '<';
+ else if (!strncmp("gt", token, 2)) text += '>';
+ else if (!strncmp("brvbar", token, 6)) text += '¦';
+ else if (!strncmp("sect", token, 4)) text += '§';
+ else if (!strncmp("copy", token, 4)) text += '©';
+ else if (!strncmp("laquo", token, 5)) text += '«';
+ else if (!strncmp("reg", token, 3)) text += '®';
+ else if (!strncmp("acute", token, 5)) text += '´';
+ else if (!strncmp("para", token, 4)) text += '¶';
+ else if (!strncmp("raquo", token, 5)) text += '»';
+ else if (!strncmp("Aacute", token, 6)) text += 'Á';
+ else if (!strncmp("Agrave", token, 6)) text += 'À';
+ else if (!strncmp("Acirc", token, 5)) text += 'Â';
+ else if (!strncmp("Auml", token, 4)) text += 'Ä';
+ else if (!strncmp("Atilde", token, 6)) text += 'Ã';
+ else if (!strncmp("Aring", token, 5)) text += 'Å';
+ else if (!strncmp("aacute", token, 6)) text += 'á';
+ else if (!strncmp("agrave", token, 6)) text += 'à';
+ else if (!strncmp("acirc", token, 5)) text += 'â';
+ else if (!strncmp("auml", token, 4)) text += 'ä';
+ else if (!strncmp("atilde", token, 6)) text += 'ã';
+ else if (!strncmp("aring", token, 5)) text += 'å';
+ else if (!strncmp("Eacute", token, 6)) text += 'É';
+ else if (!strncmp("Egrave", token, 6)) text += 'È';
+ else if (!strncmp("Ecirc", token, 5)) text += 'Ê';
+ else if (!strncmp("Euml", token, 4)) text += 'Ë';
+ else if (!strncmp("eacute", token, 6)) text += 'é';
+ else if (!strncmp("egrave", token, 6)) text += 'è';
+ else if (!strncmp("ecirc", token, 5)) text += 'ê';
+ else if (!strncmp("euml", token, 4)) text += 'ë';
+ else if (!strncmp("Iacute", token, 6)) text += 'Í';
+ else if (!strncmp("Igrave", token, 6)) text += 'Ì';
+ else if (!strncmp("Icirc", token, 5)) text += 'Î';
+ else if (!strncmp("Iuml", token, 4)) text += 'Ï';
+ else if (!strncmp("iacute", token, 6)) text += 'í';
+ else if (!strncmp("igrave", token, 6)) text += 'ì';
+ else if (!strncmp("icirc", token, 5)) text += 'î';
+ else if (!strncmp("iuml", token, 4)) text += 'ï';
+ else if (!strncmp("Oacute", token, 6)) text += 'Ó';
+ else if (!strncmp("Ograve", token, 6)) text += 'Ò';
+ else if (!strncmp("Ocirc", token, 5)) text += 'Ô';
+ else if (!strncmp("Ouml", token, 4)) text += 'Ö';
+ else if (!strncmp("Otilde", token, 6)) text += 'Õ';
+ else if (!strncmp("oacute", token, 6)) text += 'ó';
+ else if (!strncmp("ograve", token, 6)) text += 'ò';
+ else if (!strncmp("ocirc", token, 5)) text += 'ô';
+ else if (!strncmp("ouml", token, 4)) text += 'ö';
+ else if (!strncmp("otilde", token, 6)) text += 'õ';
+ else if (!strncmp("Uacute", token, 6)) text += 'Ú';
+ else if (!strncmp("Ugrave", token, 6)) text += 'Ù';
+ else if (!strncmp("Ucirc", token, 5)) text += 'Û';
+ else if (!strncmp("Uuml", token, 4)) text += 'Ü';
+ else if (!strncmp("uacute", token, 6)) text += 'ú';
+ else if (!strncmp("ugrave", token, 6)) text += 'ù';
+ else if (!strncmp("ucirc", token, 5)) text += 'û';
+ else if (!strncmp("uuml", token, 4)) text += 'ü';
+ else if (!strncmp("Yacute", token, 6)) text += 'Ý';
+ else if (!strncmp("yacute", token, 6)) text += 'ý';
+ else if (!strncmp("yuml", token, 4)) text += 'ÿ';
+
+ else if (!strncmp("deg", token, 3)) text += '°';
+ else if (!strncmp("plusmn", token, 6)) text += '±';
+ else if (!strncmp("sup2", token, 4)) text += '²';
+ else if (!strncmp("sup3", token, 4)) text += '³';
+ else if (!strncmp("sup1", token, 4)) text += '¹';
+ else if (!strncmp("nbsp", token, 4)) text += 'º';
+ else if (!strncmp("pound", token, 5)) text += '£';
+ else if (!strncmp("cent", token, 4)) text += '¢';
+ else if (!strncmp("frac14", token, 6)) text += '¼';
+ else if (!strncmp("frac12", token, 6)) text += '½';
+ else if (!strncmp("frac34", token, 6)) text += '¾';
+ else if (!strncmp("iquest", token, 6)) text += '¿';
+ else if (!strncmp("iexcl", token, 5)) text += '¡';
+ else if (!strncmp("ETH", token, 3)) text += 'Ð';
+ else if (!strncmp("eth", token, 3)) text += 'ð';
+ else if (!strncmp("THORN", token, 5)) text += 'Þ';
+ else if (!strncmp("thorn", token, 5)) text += 'þ';
+ else if (!strncmp("AElig", token, 5)) text += 'Æ';
+ else if (!strncmp("aelig", token, 5)) text += 'æ';
+ else if (!strncmp("Oslash", token, 6)) text += 'Ø';
+ else if (!strncmp("curren", token, 6)) text += '¤';
+ else if (!strncmp("Ccedil", token, 6)) text += 'Ç';
+ else if (!strncmp("ccedil", token, 6)) text += 'ç';
+ else if (!strncmp("szlig", token, 5)) text += 'ß';
+ else if (!strncmp("Ntilde", token, 6)) text += 'Ñ';
+ else if (!strncmp("ntilde", token, 6)) text += 'ñ';
+ else if (!strncmp("yen", token, 3)) text += '¥';
+ else if (!strncmp("not", token, 3)) text += '¬';
+ else if (!strncmp("ordf", token, 4)) text += 'ª';
+ else if (!strncmp("uml", token, 3)) text += '¨';
+ else if (!strncmp("shy", token, 3)) text += '';
+ else if (!strncmp("macr", token, 4)) text += '¯';
+ else if (!strncmp("micro", token, 5)) text += "µ";
+ else if (!strncmp("middot", token, 6)) text +="·";
+ else if (!strncmp("cedil", token, 5)) text += "¸";
+ else if (!strncmp("ordm", token, 4)) text += "º";
+ else if (!strncmp("times", token, 5)) text += "×";
+ else if (!strncmp("divide", token, 6)) text +="÷";
+ else if (!strncmp("oslash", token, 6)) text +="ø";
+ continue;
+ }
+
// handle silly <variant word> items in greek whnu, remove when module is fixed
if ((*from == '>') && (*(from-1) < 0)) {
text += ">";
Modified: trunk/src/modules/filters/thmlplain.cpp
===================================================================
--- trunk/src/modules/filters/thmlplain.cpp 2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/thmlplain.cpp 2006-10-04 09:28:28 UTC (rev 1981)
@@ -48,13 +48,14 @@
}
if (*from == ';' && ampersand) {
intoken = false;
+ ampersand = false;
if (!strncmp("nbsp", token, 4)) text += ' ';
else if (!strncmp("quot", token, 4)) text += '"';
else if (!strncmp("amp", token, 3)) text += '&';
else if (!strncmp("lt", token, 2)) text += '<';
else if (!strncmp("gt", token, 2)) text += '>';
- else if (!strncmp("brvbar", token, 6)) text += '|';
+ else if (!strncmp("brvbar", token, 6)) text += '¦';
else if (!strncmp("sect", token, 4)) text += '§';
else if (!strncmp("copy", token, 4)) text += '©';
else if (!strncmp("laquo", token, 5)) text += '«';
@@ -145,6 +146,13 @@
else if (!strncmp("uml", token, 3)) text += '¨';
else if (!strncmp("shy", token, 3)) text += '';
else if (!strncmp("macr", token, 4)) text += '¯';
+ else if (!strncmp("micro", token, 5)) text += "µ";
+ else if (!strncmp("middot", token, 6)) text +="·";
+ else if (!strncmp("cedil", token, 5)) text += "¸";
+ else if (!strncmp("ordm", token, 4)) text += "º";
+ else if (!strncmp("times", token, 5)) text += "×";
+ else if (!strncmp("divide", token, 6)) text +="÷";
+ else if (!strncmp("oslash", token, 6)) text +="ø";
continue;
}
Modified: trunk/src/modules/filters/thmlrtf.cpp
===================================================================
--- trunk/src/modules/filters/thmlrtf.cpp 2006-09-25 19:07:19 UTC (rev 1980)
+++ trunk/src/modules/filters/thmlrtf.cpp 2006-10-04 09:28:28 UTC (rev 1981)
@@ -38,7 +38,7 @@
addEscapeStringSubstitute("amp", "&");
addEscapeStringSubstitute("lt", "<");
addEscapeStringSubstitute("gt", ">");
- addEscapeStringSubstitute("brvbar", "|");
+ addEscapeStringSubstitute("brvbar", "¦");
addEscapeStringSubstitute("sect", "§");
addEscapeStringSubstitute("copy", "©");
addEscapeStringSubstitute("laquo", "«");
@@ -130,6 +130,14 @@
addEscapeStringSubstitute("shy", "");
addEscapeStringSubstitute("macr", "¯");
+ addEscapeStringSubstitute("micro", "µ");
+ addEscapeStringSubstitute("middot", "·");
+ addEscapeStringSubstitute("cedil", "¸");
+ addEscapeStringSubstitute("ordm", "º");
+ addEscapeStringSubstitute("times", "×");
+ addEscapeStringSubstitute("divide", "÷");
+ addEscapeStringSubstitute("oslash", "ø");
+
setTokenCaseSensitive(true);
More information about the sword-cvs
mailing list