[sword-cvs] sword/src/modules/filters thmlolb.cpp,1.5,1.6
sword@www.crosswire.org
sword@www.crosswire.org
Sat, 22 Feb 2003 00:25:06 -0700
Update of /usr/local/cvsroot/sword/src/modules/filters
In directory www:/tmp/cvs-serv24209
Modified Files:
thmlolb.cpp
Log Message:
changed to SWBasicFilter
and SWBuf
Index: thmlolb.cpp
===================================================================
RCS file: /usr/local/cvsroot/sword/src/modules/filters/thmlolb.cpp,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** thmlolb.cpp 1 Oct 2002 19:52:40 -0000 1.5
--- thmlolb.cpp 22 Feb 2003 07:25:04 -0000 1.6
***************
*** 23,242 ****
ThMLOLB::ThMLOLB()
{
}
! char ThMLOLB::ProcessText(char *text, int maxlen)
! {
! char *to, *from, token[2048];
! int tokpos = 0;
! bool intoken = false;
! int len;
! bool ampersand = false;
! int i;
!
! len = strlen(text) + 1; // shift string to right of buffer
! if (len < maxlen) {
! memmove(&text[maxlen - len], text, len);
! from = &text[maxlen - len];
! }
! else from = text; // -------------------------------
! for (to = text; *from; from++)
! {
! if (*from == '<') {
! intoken = true;
! tokpos = 0;
! memset(token, 0, 2048);
! ampersand = false;
! continue;
! }
! else if (*from == '&') {
! intoken = true;
! tokpos = 0;
! memset(token, 0, 2048);
! ampersand = true;
! continue;
! }
! if (*from == ';' && ampersand) {
! intoken = false;
!
! if (!strncmp("nbsp", token, 4)) *to++ = ' ';
! else if (!strncmp("quot", token, 4)) *to++ = '"';
! else if (!strncmp("amp", token, 3)) *to++ = '&';
! else if (!strncmp("lt", token, 2)) *to++ = '<';
! else if (!strncmp("gt", token, 2)) *to++ = '>';
! else if (!strncmp("brvbar", token, 6)) *to++ = '|';
! else if (!strncmp("sect", token, 4)) *to++ = '§';
! else if (!strncmp("copy", token, 4)) *to++ = '©';
! else if (!strncmp("laquo", token, 5)) *to++ = '«';
! else if (!strncmp("reg", token, 3)) *to++ = '®';
! else if (!strncmp("acute", token, 5)) *to++ = '´';
! else if (!strncmp("para", token, 4)) *to++ = '¶';
! else if (!strncmp("raquo", token, 5)) *to++ = '»';
!
! else if (!strncmp("Aacute", token, 6)) *to++ = 'Á';
! else if (!strncmp("Agrave", token, 6)) *to++ = 'À';
! else if (!strncmp("Acirc", token, 5)) *to++ = 'Â';
! else if (!strncmp("Auml", token, 4)) *to++ = 'Ä';
! else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã';
! else if (!strncmp("Aring", token, 5)) *to++ = 'Å';
! else if (!strncmp("aacute", token, 6)) *to++ = 'á';
! else if (!strncmp("agrave", token, 6)) *to++ = 'à';
! else if (!strncmp("acirc", token, 5)) *to++ = 'â';
! else if (!strncmp("auml", token, 4)) *to++ = 'ä';
! else if (!strncmp("atilde", token, 6)) *to++ = 'ã';
! else if (!strncmp("aring", token, 5)) *to++ = 'å';
! else if (!strncmp("Eacute", token, 6)) *to++ = 'É';
! else if (!strncmp("Egrave", token, 6)) *to++ = 'È';
! else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê';
! else if (!strncmp("Euml", token, 4)) *to++ = 'Ë';
! else if (!strncmp("eacute", token, 6)) *to++ = 'é';
! else if (!strncmp("egrave", token, 6)) *to++ = 'è';
! else if (!strncmp("ecirc", token, 5)) *to++ = 'ê';
! else if (!strncmp("euml", token, 4)) *to++ = 'ë';
! else if (!strncmp("Iacute", token, 6)) *to++ = 'Í';
! else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì';
! else if (!strncmp("Icirc", token, 5)) *to++ = 'Î';
! else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï';
! else if (!strncmp("iacute", token, 6)) *to++ = 'í';
! else if (!strncmp("igrave", token, 6)) *to++ = 'ì';
! else if (!strncmp("icirc", token, 5)) *to++ = 'î';
! else if (!strncmp("iuml", token, 4)) *to++ = 'ï';
! else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó';
! else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò';
! else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô';
! else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö';
! else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ';
! else if (!strncmp("oacute", token, 6)) *to++ = 'ó';
! else if (!strncmp("ograve", token, 6)) *to++ = 'ò';
! else if (!strncmp("ocirc", token, 5)) *to++ = 'ô';
! else if (!strncmp("ouml", token, 4)) *to++ = 'ö';
! else if (!strncmp("otilde", token, 6)) *to++ = 'õ';
! else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú';
! else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù';
! else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û';
! else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü';
! else if (!strncmp("uacute", token, 6)) *to++ = 'ú';
! else if (!strncmp("ugrave", token, 6)) *to++ = 'ù';
! else if (!strncmp("ucirc", token, 5)) *to++ = 'û';
! else if (!strncmp("uuml", token, 4)) *to++ = 'ü';
! else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý';
! else if (!strncmp("yacute", token, 6)) *to++ = 'ý';
! else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ';
!
! else if (!strncmp("deg", token, 3)) *to++ = '°';
! else if (!strncmp("plusmn", token, 6)) *to++ = '±';
! else if (!strncmp("sup2", token, 4)) *to++ = '²';
! else if (!strncmp("sup3", token, 4)) *to++ = '³';
! else if (!strncmp("sup1", token, 4)) *to++ = '¹';
! else if (!strncmp("nbsp", token, 4)) *to++ = 'º';
! else if (!strncmp("pound", token, 5)) *to++ = '£';
! else if (!strncmp("cent", token, 4)) *to++ = '¢';
! else if (!strncmp("frac14", token, 6)) *to++ = '¼';
! else if (!strncmp("frac12", token, 6)) *to++ = '½';
! else if (!strncmp("frac34", token, 6)) *to++ = '¾';
! else if (!strncmp("iquest", token, 6)) *to++ = '¿';
! else if (!strncmp("iexcl", token, 5)) *to++ = '¡';
! else if (!strncmp("ETH", token, 3)) *to++ = 'Ð';
! else if (!strncmp("eth", token, 3)) *to++ = 'ð';
! else if (!strncmp("THORN", token, 5)) *to++ = 'Þ';
! else if (!strncmp("thorn", token, 5)) *to++ = 'þ';
! else if (!strncmp("AElig", token, 5)) *to++ = 'Æ';
! else if (!strncmp("aelig", token, 5)) *to++ = 'æ';
! else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø';
! else if (!strncmp("curren", token, 6)) *to++ = '¤';
! else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç';
! else if (!strncmp("ccedil", token, 6)) *to++ = 'ç';
! else if (!strncmp("szlig", token, 5)) *to++ = 'ß';
! else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ';
! else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ';
! else if (!strncmp("yen", token, 3)) *to++ = '¥';
! else if (!strncmp("not", token, 3)) *to++ = '¬';
! else if (!strncmp("ordf", token, 4)) *to++ = 'ª';
! else if (!strncmp("uml", token, 3)) *to++ = '¨';
! else if (!strncmp("shy", token, 3)) *to++ = '';
! else if (!strncmp("macr", token, 4)) *to++ = '¯';
! continue;
!
! }
! else if (*from == '>' && !ampersand)
! {
! intoken = false;
! // process desired tokens
! if (!strncmp(token, "sync type=\"Strongs\" value=\"G", 27)) {
! *to++ = '<';
! for (i = 28; token[i] != '\"'; i++)
! *to++ = token[i];
! *to++ = '>';
! continue;
! }
! else if (!strncmp(token, "sync type=\"Strongs\" value=\"H", 27)) {
! *to++ = '<';
! for (i = 28; token[i] != '\"'; i++)
! *to++ = token[i];
! *to++ = '>';
! continue;
! }
! else if (!strncmp(token, "scripRef", 8)) {
! *to++ = '#';
! continue;
! }
! else if (!strncmp(token, "/scripRef", 9)) {
! *to++ = ' ';
! continue;
! }
! else if (!strncmp(token, "note ", 5)) {
! *to++ = '{';
! continue;
! }
! else if (!strncmp(token, "/note", 5)) {
! *to++ = '}';
! continue;
! }
! else if (!strnicmp(token, "font", 4)) {
! *to++ = '\\';
! *to++ = '\\';
! continue;
! }
! else if (!strnicmp(token, "/font", 5)) {
! *to++ = '\\';
! *to++ = '\\';
! continue;
! }
! else switch(*token) {
! case 'I': // font tags
! case 'i':
! *to++ = '\\';
! *to++ = '@';
! continue;
! case 'B': // bold start
! case 'b':
! *to++ = '\\';
! *to++ = '$';
! continue;
! case '/':
! switch(token[1]) {
! case 'I':
! case 'i': // italic end
! *to++ = '\\';
! *to++ = '@';
! continue;
! case 'B': // bold start
! case 'b':
! *to++ = '\\';
! *to++ = '$';
! continue;
! }
! }
! continue;
}
! if (intoken) {
! if (tokpos < 2047)
! token[tokpos++] = *from;
! }
! else *to++ = *from;
! }
! *to++ = 0;
! *to = 0;
! return 0;
}
--- 23,202 ----
ThMLOLB::ThMLOLB()
{
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("nbsp", " ");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("quot", "\"");
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("brvbar", "|");
+ addEscapeStringSubstitute("sect", "§");
+ addEscapeStringSubstitute("copy", "©");
+ addEscapeStringSubstitute("laquo", "«");
+ addEscapeStringSubstitute("reg", "®");
+ addEscapeStringSubstitute("acute", "´");
+ addEscapeStringSubstitute("para", "¶");
+ addEscapeStringSubstitute("raquo", "»");
+
+ addEscapeStringSubstitute("Aacute", "Á");
+ addEscapeStringSubstitute("Agrave", "À");
+ addEscapeStringSubstitute("Acirc", "Â");
+ addEscapeStringSubstitute("Auml", "Ä");
+ addEscapeStringSubstitute("Atilde", "Ã");
+ addEscapeStringSubstitute("Aring", "Å");
+ addEscapeStringSubstitute("aacute", "á");
+ addEscapeStringSubstitute("agrave", "à");
+ addEscapeStringSubstitute("acirc", "â");
+ addEscapeStringSubstitute("auml", "ä");
+ addEscapeStringSubstitute("atilde", "ã");
+ addEscapeStringSubstitute("aring", "å");
+ addEscapeStringSubstitute("Eacute", "É");
+ addEscapeStringSubstitute("Egrave", "È");
+ addEscapeStringSubstitute("Ecirc", "Ê");
+ addEscapeStringSubstitute("Euml", "Ë");
+ addEscapeStringSubstitute("eacute", "é");
+ addEscapeStringSubstitute("egrave", "è");
+ addEscapeStringSubstitute("ecirc", "ê");
+ addEscapeStringSubstitute("euml", "ë");
+ addEscapeStringSubstitute("Iacute", "Í");
+ addEscapeStringSubstitute("Igrave", "Ì");
+ addEscapeStringSubstitute("Icirc", "Î");
+ addEscapeStringSubstitute("Iuml", "Ï");
+ addEscapeStringSubstitute("iacute", "í");
+ addEscapeStringSubstitute("igrave", "ì");
+ addEscapeStringSubstitute("icirc", "î");
+ addEscapeStringSubstitute("iuml", "ï");
+ addEscapeStringSubstitute("Oacute", "Ó");
+ addEscapeStringSubstitute("Ograve", "Ò");
+ addEscapeStringSubstitute("Ocirc", "Ô");
+ addEscapeStringSubstitute("Ouml", "Ö");
+ addEscapeStringSubstitute("Otilde", "Õ");
+ addEscapeStringSubstitute("oacute", "ó");
+ addEscapeStringSubstitute("ograve", "ò");
+ addEscapeStringSubstitute("ocirc", "ô");
+ addEscapeStringSubstitute("ouml", "ö");
+ addEscapeStringSubstitute("otilde", "õ");
+ addEscapeStringSubstitute("Uacute", "Ú");
+ addEscapeStringSubstitute("Ugrave", "Ù");
+ addEscapeStringSubstitute("Ucirc", "Û");
+ addEscapeStringSubstitute("Uuml", "Ü");
+ addEscapeStringSubstitute("uacute", "ú");
+ addEscapeStringSubstitute("ugrave", "ù");
+ addEscapeStringSubstitute("ucirc", "û");
+ addEscapeStringSubstitute("uuml", "ü");
+ addEscapeStringSubstitute("Yacute", "Ý");
+ addEscapeStringSubstitute("yacute", "ý");
+ addEscapeStringSubstitute("yuml", "ÿ");
+
+ addEscapeStringSubstitute("deg", "°");
+ addEscapeStringSubstitute("plusmn", "±");
+ addEscapeStringSubstitute("sup2", "²");
+ addEscapeStringSubstitute("sup3", "³");
+ addEscapeStringSubstitute("sup1", "¹");
+ addEscapeStringSubstitute("nbsp", "º");
+ addEscapeStringSubstitute("pound", "£");
+ addEscapeStringSubstitute("cent", "¢");
+ addEscapeStringSubstitute("frac14", "¼");
+ addEscapeStringSubstitute("frac12", "½");
+ addEscapeStringSubstitute("frac34", "¾");
+ addEscapeStringSubstitute("iquest", "¿");
+ addEscapeStringSubstitute("iexcl", "¡");
+ addEscapeStringSubstitute("ETH", "Ð");
+ addEscapeStringSubstitute("eth", "ð");
+ addEscapeStringSubstitute("THORN", "Þ");
+ addEscapeStringSubstitute("thorn", "þ");
+ addEscapeStringSubstitute("AElig", "Æ");
+ addEscapeStringSubstitute("aelig", "æ");
+ addEscapeStringSubstitute("Oslash", "Ø");
+ addEscapeStringSubstitute("curren", "¤");
+ addEscapeStringSubstitute("Ccedil", "Ç");
+ addEscapeStringSubstitute("ccedil", "ç");
+ addEscapeStringSubstitute("szlig", "ß");
+ addEscapeStringSubstitute("Ntilde", "Ñ");
+ addEscapeStringSubstitute("ntilde", "ñ");
+ addEscapeStringSubstitute("yen", "¥");
+ addEscapeStringSubstitute("not", "¬");
+ addEscapeStringSubstitute("ordf", "ª");
+ addEscapeStringSubstitute("uml", "¨");
+ addEscapeStringSubstitute("shy", "");
+ addEscapeStringSubstitute("macr", "¯");
+
+ setTokenCaseSensitive(true);
+
+ addTokenSubstitute("/note", "}");
+ addTokenSubstitute("/note", "\\");
+
+ addTokenSubstitute("br", "\n");
+ addTokenSubstitute("br /", "\n");
+ addTokenSubstitute("i", "\\@");
+ addTokenSubstitute("/i", "\\$");
+ addTokenSubstitute("b", "{\\@");
+ addTokenSubstitute("/b", "\\$");
+ addTokenSubstitute("p", "\n");
+
+ //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant
+ addTokenSubstitute("BR", "\n");
+ addTokenSubstitute("I", "\\@");
+ addTokenSubstitute("/I", "\\$");
+ addTokenSubstitute("B", "\\@");
+ addTokenSubstitute("/B", "\\$");
+ addTokenSubstitute("P", "\n");
}
! bool ThMLOLB::handleToken(SWBuf &buf, const char *token, DualStringMap &userData) {
! if (!substituteToken(buf, token)) {
! // manually process if it wasn't a simple substitution
! if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) {
! if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') {
! buf += "<";
! for (unsigned int i = 28; token[i] != '\"'; i++)
! buf += token[i];
! buf += ">";
! }
! else if (token[27] == 'T') {
! buf += "<";
! for (unsigned int i = 28; token[i] != '\"'; i++)
! buf += token[i];
! buf += ">";
! }
! }
! else if (!strncmp(token, "sync type=\"morph\" ", 18)) {
! buf += "{";
! for (const char *tok = token + 5; *tok; tok++) {
! if (!strncmp(tok, "value=\"", 7)) {
! tok += 7;
! for (;*tok != '\"'; tok++)
! buf+= *tok;
! break;
! }
! }
! buf += "}";
! }
! else if (!strncmp(token, "scripRef", 8)) {
! buf += "#";
! }
! else if (!strncmp(token, "/scripRef", 9)) {
! buf += " ";
! }
! else if (!strncmp(token, "font", 3)) {
! buf += "\\";
! }
! else if (!strncmp(token, "note", 4)) {
! buf += "{";
! }
!
! else {
! return false; // we still didn't handle token
! }
}
! return true;
}