/****************************************************************************** * * thmlosis.cpp - filter to convert ThML to OSIS * * $Id$ * * Copyright 2002-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society * P. O. Box 2528 * Tempe, AZ 85280-2528 * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation version 2. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * */ #include #include #include #include #include #include #include #include #include #include SWORD_NAMESPACE_START ThMLOSIS::ThMLOSIS() { } ThMLOSIS::~ThMLOSIS() { } char ThMLOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) { char token[2048]; // cheese. Fix. int tokpos = 0; bool intoken = false; bool keepToken = false; bool ampersand = false; // static QuoteStack quoteStack; bool lastspace = false; char val[128]; SWBuf buf; char *valto; char *ch; const char *wordStart = text.c_str(); const char *wordEnd = NULL; const char *textStart = NULL; const char *textEnd = NULL; bool suspendTextPassThru = false; bool handled = false; bool newText = false; bool newWord = false; // SWBuf tmp; SWBuf divEnd = ""; SWBuf orig = text; const char* from = orig.c_str(); text = ""; for (from = orig.c_str(); *from; ++from) { // handle silly items in greek whnu, remove when module is fixed if ((*from == '<') && (*(from+1) < 0)) { text += "<"; continue; } if (*from == '<') { //start of new token detected intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; ampersand = false; textEnd = from-1; wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to! // wordEnd = to; continue; } if (*from == '&') { intoken = true; tokpos = 0; token[0] = 0; token[1] = 0; token[2] = 0; ampersand = true; continue; } if (*from == ';' && ampersand) { intoken = false; ampersand = false; if (*token == '#') { text += '&'; text += token; text += ';'; } else if (!strncmp("nbsp", token, 4)) text += ' '; else if (!strncmp("quot", token, 4)) text += '"'; else if (!strncmp("amp", token, 3)) text += '&'; else if (!strncmp("lt", token, 2)) text += '<'; else if (!strncmp("gt", token, 2)) text += '>'; else if (!strncmp("brvbar", token, 6)) text += '¦'; else if (!strncmp("sect", token, 4)) text += '§'; else if (!strncmp("copy", token, 4)) text += '©'; else if (!strncmp("laquo", token, 5)) text += '«'; else if (!strncmp("reg", token, 3)) text += '®'; else if (!strncmp("acute", token, 5)) text += '´'; else if (!strncmp("para", token, 4)) text += '¶'; else if (!strncmp("raquo", token, 5)) text += '»'; else if (!strncmp("Aacute", token, 6)) text += 'Á'; else if (!strncmp("Agrave", token, 6)) text += 'À'; else if (!strncmp("Acirc", token, 5)) text += 'Â'; else if (!strncmp("Auml", token, 4)) text += 'Ä'; else if (!strncmp("Atilde", token, 6)) text += 'Ã'; else if (!strncmp("Aring", token, 5)) text += 'Å'; else if (!strncmp("aacute", token, 6)) text += 'á'; else if (!strncmp("agrave", token, 6)) text += 'à'; else if (!strncmp("acirc", token, 5)) text += 'â'; else if (!strncmp("auml", token, 4)) text += 'ä'; else if (!strncmp("atilde", token, 6)) text += 'ã'; else if (!strncmp("aring", token, 5)) text += 'å'; else if (!strncmp("Eacute", token, 6)) text += 'É'; else if (!strncmp("Egrave", token, 6)) text += 'È'; else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; else if (!strncmp("Euml", token, 4)) text += 'Ë'; else if (!strncmp("eacute", token, 6)) text += 'é'; else if (!strncmp("egrave", token, 6)) text += 'è'; else if (!strncmp("ecirc", token, 5)) text += 'ê'; else if (!strncmp("euml", token, 4)) text += 'ë'; else if (!strncmp("Iacute", token, 6)) text += 'Í'; else if (!strncmp("Igrave", token, 6)) text += 'Ì'; else if (!strncmp("Icirc", token, 5)) text += 'Î'; else if (!strncmp("Iuml", token, 4)) text += 'Ï'; else if (!strncmp("iacute", token, 6)) text += 'í'; else if (!strncmp("igrave", token, 6)) text += 'ì'; else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += ">"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = ""; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, ""); text.append("<title>"); divEnd = ""; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, ""); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, ""); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, ""); text.append(""); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, ""); text.append(""); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, ""); text.append(""); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, ""); text.append(""); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, ""); text.append(""); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, ""); text.append(""); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "
getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, " attribute! buf = ""; buf.appendFormatted("", val); text.insert(wordStart - text.c_str(), buf); text += ""; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, " attribute fond buf = ""; buf.appendFormatted("", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += ""; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : ""); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += ""; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t
", vkey->getOSISRef()); // } // else sprintf(ref, "\t
", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END