The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
thmlosis.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * thmlosis.cpp - filter to convert ThML to OSIS
4  *
5  * $Id: thmlosis.cpp 3808 2020-10-02 13:23:34Z scribe $
6  *
7  * Copyright 2002-2013 CrossWire Bible Society (http://www.crosswire.org)
8  * CrossWire Bible Society
9  * P. O. Box 2528
10  * Tempe, AZ 85280-2528
11  *
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU General Public License as published by the
14  * Free Software Foundation version 2.
15  *
16  * This program is distributed in the hope that it will be useful, but
17  * WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * General Public License for more details.
20  *
21  */
22 
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <stdarg.h>
26 #include <ctype.h>
27 #include <thmlosis.h>
28 #include <swmodule.h>
29 #include <swlog.h>
30 #include <versekey.h>
31 #include <utilstr.h>
32 #include <utilxml.h>
33 
34 
36 
38 }
39 
40 
42 }
43 
44 
45 char ThMLOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
46  char token[2048]; // cheese. Fix.
47  int tokpos = 0;
48  bool intoken = false;
49  bool keepToken = false;
50  bool ampersand = false;
51 
52 // static QuoteStack quoteStack;
53 
54  bool lastspace = false;
55  char val[128];
56  SWBuf buf;
57  char *valto;
58  char *ch;
59 
60  const char *wordStart = text.c_str();
61  const char *wordEnd = NULL;
62 
63  const char *textStart = NULL;
64  const char *textEnd = NULL;
65 
66  bool suspendTextPassThru = false;
67  bool handled = false;
68  bool newText = false;
69  bool newWord = false;
70 
71 // SWBuf tmp;
72  SWBuf divEnd = "";
73 
74  SWBuf orig = text;
75  const char* from = orig.c_str();
76 
77  text = "";
78  for (from = orig.c_str(); *from; ++from) {
79 
80  // handle silly <variant word> items in greek whnu, remove when module is fixed
81  if ((*from == '<') && (*(from+1) < 0)) {
82  text += "&lt;";
83  continue;
84  }
85 
86  if (*from == '<') { //start of new token detected
87  intoken = true;
88  tokpos = 0;
89  token[0] = 0;
90  token[1] = 0;
91  token[2] = 0;
92  ampersand = false;
93  textEnd = from-1;
94  wordEnd = text.c_str() + text.length();//not good, instead of wordEnd = to!
95 
96 // wordEnd = to;
97  continue;
98  }
99 
100  if (*from == '&') {
101  intoken = true;
102  tokpos = 0;
103  token[0] = 0;
104  token[1] = 0;
105  token[2] = 0;
106  ampersand = true;
107  continue;
108  }
109 
110  if (*from == ';' && ampersand) {
111  intoken = false;
112  ampersand = false;
113 
114  if (*token == '#') {
115  text += '&';
116  text += token;
117  text += ';';
118  }
119  else if (!strncmp("nbsp", token, 4)) text += ' ';
120  else if (!strncmp("quot", token, 4)) text += '"';
121  else if (!strncmp("amp", token, 3)) text += '&';
122  else if (!strncmp("lt", token, 2)) text += '<';
123  else if (!strncmp("gt", token, 2)) text += '>';
124  else if (!strncmp("brvbar", token, 6)) text += '¦';
125  else if (!strncmp("sect", token, 4)) text += '§';
126  else if (!strncmp("copy", token, 4)) text += '©';
127  else if (!strncmp("laquo", token, 5)) text += '«';
128  else if (!strncmp("reg", token, 3)) text += '®';
129  else if (!strncmp("acute", token, 5)) text += '´';
130  else if (!strncmp("para", token, 4)) text += '¶';
131  else if (!strncmp("raquo", token, 5)) text += '»';
132  else if (!strncmp("Aacute", token, 6)) text += 'Á';
133  else if (!strncmp("Agrave", token, 6)) text += 'À';
134  else if (!strncmp("Acirc", token, 5)) text += 'Â';
135  else if (!strncmp("Auml", token, 4)) text += 'Ä';
136  else if (!strncmp("Atilde", token, 6)) text += 'Ã';
137  else if (!strncmp("Aring", token, 5)) text += 'Å';
138  else if (!strncmp("aacute", token, 6)) text += 'á' else if (!strncmp("agrave", token, 6)) text += 'à'; else if (!strncmp("acirc", token, 5)) text += 'â'; else if (!strncmp("auml", token, 4)) text += 'ä'; else if (!strncmp("atilde", token, 6)) text += 'ã'; else if (!strncmp("aring", token, 5)) text += 'å'; else if (!strncmp("Eacute", token, 6)) text += 'É'; else if (!strncmp("Egrave", token, 6)) text += 'È'; else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; else if (!strncmp("Euml", token, 4)) text += 'Ë'; else if (!strncmp("eacute", token, 6)) text += 'é'; else if (!strncmp("egrave", token, 6)) text += 'è'; else if (!strncmp("ecirc", token, 5)) text += 'ê'; else if (!strncmp("euml", token, 4)) text += 'ë'; else if (!strncmp("Iacute", token, 6)) text += 'Í'; else if (!strncmp("Igrave", token, 6)) text += 'Ì'; else if (!strncmp("Icirc", token, 5)) text += 'Î'; else if (!strncmp("Iuml", token, 4)) text += 'Ï'; else if (!strncmp("iacute", token, 6)) text += 'í'; else if (!strncmp("igrave", token, 6)) text += 'ì'; else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
139  else if (!strncmp("agrave", token, 6)) text += 'à' else if (!strncmp("acirc", token, 5)) text += 'â'; else if (!strncmp("auml", token, 4)) text += 'ä'; else if (!strncmp("atilde", token, 6)) text += 'ã'; else if (!strncmp("aring", token, 5)) text += 'å'; else if (!strncmp("Eacute", token, 6)) text += 'É'; else if (!strncmp("Egrave", token, 6)) text += 'È'; else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; else if (!strncmp("Euml", token, 4)) text += 'Ë'; else if (!strncmp("eacute", token, 6)) text += 'é'; else if (!strncmp("egrave", token, 6)) text += 'è'; else if (!strncmp("ecirc", token, 5)) text += 'ê'; else if (!strncmp("euml", token, 4)) text += 'ë'; else if (!strncmp("Iacute", token, 6)) text += 'Í'; else if (!strncmp("Igrave", token, 6)) text += 'Ì'; else if (!strncmp("Icirc", token, 5)) text += 'Î'; else if (!strncmp("Iuml", token, 4)) text += 'Ï'; else if (!strncmp("iacute", token, 6)) text += 'í'; else if (!strncmp("igrave", token, 6)) text += 'ì'; else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
140  else if (!strncmp("acirc", token, 5)) text += 'â' else if (!strncmp("auml", token, 4)) text += 'ä'; else if (!strncmp("atilde", token, 6)) text += 'ã'; else if (!strncmp("aring", token, 5)) text += 'å'; else if (!strncmp("Eacute", token, 6)) text += 'É'; else if (!strncmp("Egrave", token, 6)) text += 'È'; else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; else if (!strncmp("Euml", token, 4)) text += 'Ë'; else if (!strncmp("eacute", token, 6)) text += 'é'; else if (!strncmp("egrave", token, 6)) text += 'è'; else if (!strncmp("ecirc", token, 5)) text += 'ê'; else if (!strncmp("euml", token, 4)) text += 'ë'; else if (!strncmp("Iacute", token, 6)) text += 'Í'; else if (!strncmp("Igrave", token, 6)) text += 'Ì'; else if (!strncmp("Icirc", token, 5)) text += 'Î'; else if (!strncmp("Iuml", token, 4)) text += 'Ï'; else if (!strncmp("iacute", token, 6)) text += 'í'; else if (!strncmp("igrave", token, 6)) text += 'ì'; else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
141  else if (!strncmp("auml", token, 4)) text += 'ä' else if (!strncmp("atilde", token, 6)) text += 'ã'; else if (!strncmp("aring", token, 5)) text += 'å'; else if (!strncmp("Eacute", token, 6)) text += 'É'; else if (!strncmp("Egrave", token, 6)) text += 'È'; else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; else if (!strncmp("Euml", token, 4)) text += 'Ë'; else if (!strncmp("eacute", token, 6)) text += 'é'; else if (!strncmp("egrave", token, 6)) text += 'è'; else if (!strncmp("ecirc", token, 5)) text += 'ê'; else if (!strncmp("euml", token, 4)) text += 'ë'; else if (!strncmp("Iacute", token, 6)) text += 'Í'; else if (!strncmp("Igrave", token, 6)) text += 'Ì'; else if (!strncmp("Icirc", token, 5)) text += 'Î'; else if (!strncmp("Iuml", token, 4)) text += 'Ï'; else if (!strncmp("iacute", token, 6)) text += 'í'; else if (!strncmp("igrave", token, 6)) text += 'ì'; else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
142  else if (!strncmp("atilde", token, 6)) text += 'ã' else if (!strncmp("aring", token, 5)) text += 'å'; else if (!strncmp("Eacute", token, 6)) text += 'É'; else if (!strncmp("Egrave", token, 6)) text += 'È'; else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; else if (!strncmp("Euml", token, 4)) text += 'Ë'; else if (!strncmp("eacute", token, 6)) text += 'é'; else if (!strncmp("egrave", token, 6)) text += 'è'; else if (!strncmp("ecirc", token, 5)) text += 'ê'; else if (!strncmp("euml", token, 4)) text += 'ë'; else if (!strncmp("Iacute", token, 6)) text += 'Í'; else if (!strncmp("Igrave", token, 6)) text += 'Ì'; else if (!strncmp("Icirc", token, 5)) text += 'Î'; else if (!strncmp("Iuml", token, 4)) text += 'Ï'; else if (!strncmp("iacute", token, 6)) text += 'í'; else if (!strncmp("igrave", token, 6)) text += 'ì'; else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
143  else if (!strncmp("aring", token, 5)) text += 'å' else if (!strncmp("Eacute", token, 6)) text += 'É'; else if (!strncmp("Egrave", token, 6)) text += 'È'; else if (!strncmp("Ecirc", token, 5)) text += 'Ê'; else if (!strncmp("Euml", token, 4)) text += 'Ë'; else if (!strncmp("eacute", token, 6)) text += 'é'; else if (!strncmp("egrave", token, 6)) text += 'è'; else if (!strncmp("ecirc", token, 5)) text += 'ê'; else if (!strncmp("euml", token, 4)) text += 'ë'; else if (!strncmp("Iacute", token, 6)) text += 'Í'; else if (!strncmp("Igrave", token, 6)) text += 'Ì'; else if (!strncmp("Icirc", token, 5)) text += 'Î'; else if (!strncmp("Iuml", token, 4)) text += 'Ï'; else if (!strncmp("iacute", token, 6)) text += 'í'; else if (!strncmp("igrave", token, 6)) text += 'ì'; else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
144  else if (!strncmp("Eacute", token, 6)) text += 'É';
145  else if (!strncmp("Egrave", token, 6)) text += 'È';
146  else if (!strncmp("Ecirc", token, 5)) text += 'Ê';
147  else if (!strncmp("Euml", token, 4)) text += 'Ë';
148  else if (!strncmp("eacute", token, 6)) text += 'é' else if (!strncmp("egrave", token, 6)) text += 'è'; else if (!strncmp("ecirc", token, 5)) text += 'ê'; else if (!strncmp("euml", token, 4)) text += 'ë'; else if (!strncmp("Iacute", token, 6)) text += 'Í'; else if (!strncmp("Igrave", token, 6)) text += 'Ì'; else if (!strncmp("Icirc", token, 5)) text += 'Î'; else if (!strncmp("Iuml", token, 4)) text += 'Ï'; else if (!strncmp("iacute", token, 6)) text += 'í'; else if (!strncmp("igrave", token, 6)) text += 'ì'; else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
149  else if (!strncmp("egrave", token, 6)) text += 'è' else if (!strncmp("ecirc", token, 5)) text += 'ê'; else if (!strncmp("euml", token, 4)) text += 'ë'; else if (!strncmp("Iacute", token, 6)) text += 'Í'; else if (!strncmp("Igrave", token, 6)) text += 'Ì'; else if (!strncmp("Icirc", token, 5)) text += 'Î'; else if (!strncmp("Iuml", token, 4)) text += 'Ï'; else if (!strncmp("iacute", token, 6)) text += 'í'; else if (!strncmp("igrave", token, 6)) text += 'ì'; else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
150  else if (!strncmp("ecirc", token, 5)) text += 'ê' else if (!strncmp("euml", token, 4)) text += 'ë'; else if (!strncmp("Iacute", token, 6)) text += 'Í'; else if (!strncmp("Igrave", token, 6)) text += 'Ì'; else if (!strncmp("Icirc", token, 5)) text += 'Î'; else if (!strncmp("Iuml", token, 4)) text += 'Ï'; else if (!strncmp("iacute", token, 6)) text += 'í'; else if (!strncmp("igrave", token, 6)) text += 'ì'; else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
151  else if (!strncmp("euml", token, 4)) text += 'ë' else if (!strncmp("Iacute", token, 6)) text += 'Í'; else if (!strncmp("Igrave", token, 6)) text += 'Ì'; else if (!strncmp("Icirc", token, 5)) text += 'Î'; else if (!strncmp("Iuml", token, 4)) text += 'Ï'; else if (!strncmp("iacute", token, 6)) text += 'í'; else if (!strncmp("igrave", token, 6)) text += 'ì'; else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
152  else if (!strncmp("Iacute", token, 6)) text += 'Í';
153  else if (!strncmp("Igrave", token, 6)) text += 'Ì';
154  else if (!strncmp("Icirc", token, 5)) text += 'Î';
155  else if (!strncmp("Iuml", token, 4)) text += 'Ï';
156  else if (!strncmp("iacute", token, 6)) text += 'í' else if (!strncmp("igrave", token, 6)) text += 'ì'; else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
157  else if (!strncmp("igrave", token, 6)) text += 'ì' else if (!strncmp("icirc", token, 5)) text += 'î'; else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
158  else if (!strncmp("icirc", token, 5)) text += 'î' else if (!strncmp("iuml", token, 4)) text += 'ï'; else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
159  else if (!strncmp("iuml", token, 4)) text += 'ï' else if (!strncmp("Oacute", token, 6)) text += 'Ó'; else if (!strncmp("Ograve", token, 6)) text += 'Ò'; else if (!strncmp("Ocirc", token, 5)) text += 'Ô'; else if (!strncmp("Ouml", token, 4)) text += 'Ö'; else if (!strncmp("Otilde", token, 6)) text += 'Õ'; else if (!strncmp("oacute", token, 6)) text += 'ó'; else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
160  else if (!strncmp("Oacute", token, 6)) text += 'Ó';
161  else if (!strncmp("Ograve", token, 6)) text += 'Ò';
162  else if (!strncmp("Ocirc", token, 5)) text += 'Ô';
163  else if (!strncmp("Ouml", token, 4)) text += 'Ö';
164  else if (!strncmp("Otilde", token, 6)) text += 'Õ';
165  else if (!strncmp("oacute", token, 6)) text += 'ó' else if (!strncmp("ograve", token, 6)) text += 'ò'; else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
166  else if (!strncmp("ograve", token, 6)) text += 'ò' else if (!strncmp("ocirc", token, 5)) text += 'ô'; else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
167  else if (!strncmp("ocirc", token, 5)) text += 'ô' else if (!strncmp("ouml", token, 4)) text += 'ö'; else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
168  else if (!strncmp("ouml", token, 4)) text += 'ö' else if (!strncmp("otilde", token, 6)) text += 'õ'; else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
169  else if (!strncmp("otilde", token, 6)) text += 'õ' else if (!strncmp("Uacute", token, 6)) text += 'Ú'; else if (!strncmp("Ugrave", token, 6)) text += 'Ù'; else if (!strncmp("Ucirc", token, 5)) text += 'Û'; else if (!strncmp("Uuml", token, 4)) text += 'Ü'; else if (!strncmp("uacute", token, 6)) text += 'ú'; else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
170  else if (!strncmp("Uacute", token, 6)) text += 'Ú';
171  else if (!strncmp("Ugrave", token, 6)) text += 'Ù';
172  else if (!strncmp("Ucirc", token, 5)) text += 'Û';
173  else if (!strncmp("Uuml", token, 4)) text += 'Ü';
174  else if (!strncmp("uacute", token, 6)) text += 'ú' else if (!strncmp("ugrave", token, 6)) text += 'ù'; else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
175  else if (!strncmp("ugrave", token, 6)) text += 'ù' else if (!strncmp("ucirc", token, 5)) text += 'û'; else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
176  else if (!strncmp("ucirc", token, 5)) text += 'û' else if (!strncmp("uuml", token, 4)) text += 'ü'; else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
177  else if (!strncmp("uuml", token, 4)) text += 'ü' else if (!strncmp("Yacute", token, 6)) text += 'Ý'; else if (!strncmp("yacute", token, 6)) text += 'ý'; else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
178  else if (!strncmp("Yacute", token, 6)) text += 'Ý';
179  else if (!strncmp("yacute", token, 6)) text += 'ý' else if (!strncmp("yuml", token, 4)) text += 'ÿ'; else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
180  else if (!strncmp("yuml", token, 4)) text += 'ÿ' else if (!strncmp("deg", token, 3)) text += '°'; else if (!strncmp("plusmn", token, 6)) text += '±'; else if (!strncmp("sup2", token, 4)) text += '²'; else if (!strncmp("sup3", token, 4)) text += '³'; else if (!strncmp("sup1", token, 4)) text += '¹'; else if (!strncmp("nbsp", token, 4)) text += 'º'; else if (!strncmp("pound", token, 5)) text += '£'; else if (!strncmp("cent", token, 4)) text += '¢'; else if (!strncmp("frac14", token, 6)) text += '¼'; else if (!strncmp("frac12", token, 6)) text += '½'; else if (!strncmp("frac34", token, 6)) text += '¾'; else if (!strncmp("iquest", token, 6)) text += '¿'; else if (!strncmp("iexcl", token, 5)) text += '¡'; else if (!strncmp("ETH", token, 3)) text += 'Ð'; else if (!strncmp("eth", token, 3)) text += 'ð'; else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
181 
182  else if (!strncmp("deg", token, 3)) text += '°';
183  else if (!strncmp("plusmn", token, 6)) text += '±';
184  else if (!strncmp("sup2", token, 4)) text += '²';
185  else if (!strncmp("sup3", token, 4)) text += '³';
186  else if (!strncmp("sup1", token, 4)) text += '¹';
187  else if (!strncmp("nbsp", token, 4)) text += 'º';
188  else if (!strncmp("pound", token, 5)) text += '£';
189  else if (!strncmp("cent", token, 4)) text += '¢';
190  else if (!strncmp("frac14", token, 6)) text += '¼';
191  else if (!strncmp("frac12", token, 6)) text += '½';
192  else if (!strncmp("frac34", token, 6)) text += '¾';
193  else if (!strncmp("iquest", token, 6)) text += '¿';
194  else if (!strncmp("iexcl", token, 5)) text += '¡';
195  else if (!strncmp("ETH", token, 3)) text += 'Ð';
196  else if (!strncmp("eth", token, 3)) text += 'ð' else if (!strncmp("THORN", token, 5)) text += 'Þ'; else if (!strncmp("thorn", token, 5)) text += 'þ'; else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
197  else if (!strncmp("THORN", token, 5)) text += 'Þ';
198  else if (!strncmp("thorn", token, 5)) text += 'þ' else if (!strncmp("AElig", token, 5)) text += 'Æ'; else if (!strncmp("aelig", token, 5)) text += 'æ'; else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
199  else if (!strncmp("AElig", token, 5)) text += 'Æ';
200  else if (!strncmp("aelig", token, 5)) text += 'æ' else if (!strncmp("Oslash", token, 6)) text += 'Ø'; else if (!strncmp("curren", token, 6)) text += '¤'; else if (!strncmp("Ccedil", token, 6)) text += 'Ç'; else if (!strncmp("ccedil", token, 6)) text += 'ç'; else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
201  else if (!strncmp("Oslash", token, 6)) text += 'Ø';
202  else if (!strncmp("curren", token, 6)) text += '¤';
203  else if (!strncmp("Ccedil", token, 6)) text += 'Ç';
204  else if (!strncmp("ccedil", token, 6)) text += 'ç' else if (!strncmp("szlig", token, 5)) text += 'ß'; else if (!strncmp("Ntilde", token, 6)) text += 'Ñ'; else if (!strncmp("ntilde", token, 6)) text += 'ñ'; else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
205  else if (!strncmp("szlig", token, 5)) text += 'ß';
206  else if (!strncmp("Ntilde", token, 6)) text += 'Ñ';
207  else if (!strncmp("ntilde", token, 6)) text += 'ñ' else if (!strncmp("yen", token, 3)) text += '¥'; else if (!strncmp("not", token, 3)) text += '¬'; else if (!strncmp("ordf", token, 4)) text += 'ª'; else if (!strncmp("uml", token, 3)) text += '¨'; else if (!strncmp("shy", token, 3)) text += '­'; else if (!strncmp("macr", token, 4)) text += '¯'; else if (!strncmp("micro", token, 5)) text += "µ"; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ;
208  else if (!strncmp("yen", token, 3)) text += '¥';
209  else if (!strncmp("not", token, 3)) text += '¬';
210  else if (!strncmp("ordf", token, 4)) text += 'ª';
211  else if (!strncmp("uml", token, 3)) text += '¨';
212  else if (!strncmp("shy", token, 3)) text += '­';
213  else if (!strncmp("macr", token, 4)) text += '¯';
214  else if (!strncmp("micro", token, 5)) text += "µ; else if (!strncmp("middot", token, 6)) text +="·"; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ";
215  else if (!strncmp("middot", token, 6)) text +="·; else if (!strncmp("cedil", token, 5)) text += "¸"; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ";
216  else if (!strncmp("cedil", token, 5)) text += "¸; else if (!strncmp("ordm", token, 4)) text += "º"; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ";
217  else if (!strncmp("ordm", token, 4)) text += "º; else if (!strncmp("times", token, 5)) text += "×"; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ";
218  else if (!strncmp("times", token, 5)) text += "×; else if (!strncmp("divide", token, 6)) text +="÷"; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ";
219  else if (!strncmp("divide", token, 6)) text +="÷; else if (!strncmp("oslash", token, 6)) text +="ø"; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ";
220  else if (!strncmp("oslash", token, 6)) text +="ø; continue; } // handle silly <variant word> items in greek whnu, remove when module is fixed if ((*from == '>') && (*(from-1) < 0)) { text += "&gt;"; continue; } if (*from == '>') { // process tokens intoken = false; keepToken = false; suspendTextPassThru = false; newWord = true; handled = false; while (wordStart < (text.c_str() + text.length())) { //hack if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1]) wordStart++; else break; } while (wordEnd > wordStart) { if (strchr(" ,;:.?!()'\"", *wordEnd)) wordEnd--; else break; } // variants if (!strncmp(token, "div type=\"variant\"", 18)) { XMLTag tag = token; text.append("<seg type=\"x-variant\""); SWBuf cls = "x-class:"; cls += tag.getAttribute("class"); if (cls.length()>8) text.appendFormatted(" subType=\"%s\"", cls.c_str()); text += ">"; divEnd = "</seg>"; newText = true; lastspace = false; handled = true; } // section titles if (!strcmp(token, "div class=\"sechead\"")) { // pushString(&to, "<title>"); text.append("<title>"); divEnd = "</title>"; newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/div")) { //pushString(&to, divEnd.c_str()); text.append(divEnd); lastspace = false; handled = true; } // Scripture Reference if (!strncmp(token, "scripRef", 8)) { // pushString(buf, "<reference osisRef=\""); suspendTextPassThru = true; newText = true; handled = true; } else if (!strncmp(token, "/scripRef", 9)) { SWBuf tmp; tmp = ""; tmp.append(textStart, (int)(textEnd - textStart)+1); //pushString(&to, convertToOSIS(tmp.c_str(), key)); text.append(VerseKey::convertToOSIS(tmp.c_str(), key)); suspendTextPassThru = false; handled = true; } // Usage of italics to represent transChange isn't domaninant; // solution: mark in OSIS instead, assume no semantics other than emphasis // of italicized text // if (!strcmp(module->Type(), "Biblical Texts")) { // // Italics assume transchange for Biblical texts // if (!stricmp(token, "i")) { // pushString(&to, "<transChange type=\"added\">"); // newText = true; // lastspace = false; // handled = true; // } // else if (!stricmp(token, "/i")) { // pushString(&to, "</transChange>"); // lastspace = false; // handled = true; // } // } // else { // // otherwise, italics are just italics //-- end italics for transchange if (!stricmp(token, "i")) { // pushString(&to, "<hi type=\"i\">"); text.append("<hi type=\"i\">"); newText = true; lastspace = false; handled = true; } else if (!stricmp(token, "/i")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // } if (!strcmp(token, "b")) { // pushString(&to, "<hi type=\"b\">"); text.append("<hi type=\"b\">"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/b")) { // pushString(&to, "</hi>"); text.append("</hi>"); lastspace = false; handled = true; } // Footnote if (!strncmp(token, "note", 4)) { //pushString(&to, "<note>"); text.append("<note>"); newText = true; lastspace = false; handled = true; } else if (!strcmp(token, "/note")) { // pushString(&to, "</note>"); text.append("</note>"); lastspace = false; handled = true; } // Figure else if (!strncmp(token, "img ", 4)) { const char *src = strstr(token, "src"); if (!src) // assert we have a src attribute continue; // return false; //pushString(&to, "<figure src=\""); text.append("<figure src=\""); const char* end = strchr(src+2, '"'); //start search behind src=" if (end) { //append the path text.append(src+2, end - (src+2)); } // const char *c; // for (c = src;((*c) && (*c != '"')); c++); // uncomment for SWORD absolute path logic // if (*(c+1) == '/') { // pushString(buf, "file:"); // pushString(buf, module->getConfigEntry("AbsoluteDataPath")); // if (*((*buf)-1) == '/') // c++; // skip '/' // } // end of uncomment for asolute path logic // for (c++;((*c) && (*c != '"')); c++) // *to++ = *c; //pushString(&to, "\" />"); text.append("\" />"); handled = true; } // Strongs numbers else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs valto = val; for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) *valto++ = token[i]; *valto = 0; if (atoi((!isdigit(*val))?val+1:val) < 5627) { // normal strongs number strstrip(val); if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "lemma"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("strong:%s ", val); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val); } text.insert(attStart - text.c_str(), buf); } else { //wordStart doesn't point to an existing <w> attribute! buf = ""; buf.appendFormatted("<w lemma=\"strong:%s\">", val); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } } // OLB verb morph, leave it out of OSIS tag else { } handled = true; } // Morphology else if (!strncmp(token, "sync type=\"morph\"", 17)) { SWBuf cls = ""; SWBuf morph = ""; for (ch = token+17; *ch; ch++) { if (!strncmp(ch, "class=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); cls = val; } if (!strncmp(ch, "value=\"", 7)) { valto = val; for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) *valto++ = ch[i]; *valto = 0; strstrip(val); morph = val; } } if (!strncmp(wordStart, "<w ", 3)) { const char *attStart = strstr(wordStart, "morph"); if (attStart) { //existing morph attribute, append this one to it attStart += 7; buf = ""; buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } else { // no lemma attribute attStart = wordStart + 3; buf = ""; buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); } text.insert(attStart - text.c_str(), buf); //hack, we have to } else { //no existing <w> attribute fond buf = ""; buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str()); text.insert(wordStart - text.c_str(), buf); text += "</w>"; lastspace = false; } handled = true; } if (!keepToken) { if (!handled) { SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>"); // exit(-1); } if (from[1] && strchr(" ,;.:?!()'\"", from[1])) { if (lastspace) { text--; } } if (newText) { textStart = from+1; newText = false; } continue; } // if not a strongs token, keep token in text text.appendFormatted("<%s>", token); if (newText) { textStart = text.c_str() + text.length(); newWord = false; } continue; } if (intoken) { if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) { token[tokpos++] = *from; token[tokpos+2] = 0; } } else { switch (*from) { case '\'': case '\"': case '`': // quoteStack.handleQuote(fromStart, from, &to); text += *from; //from++; //this line removes chars after an apostrophe! Needs fixing. break; default: if (newWord && (*from != ' ')) { wordStart = text.c_str() + text.length(); newWord = false; //fix this if required? //memset(to, 0, 10); } if (!suspendTextPassThru) { text += (*from); lastspace = (*from == ' '); } } } } const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key); if (vkey) { SWBuf ref = ""; if (vkey->getVerse()) { ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); } if (ref.length() > 0) { text = ref + text; if (vkey->getVerse()) { VerseKey *tmp = (VerseKey *)vkey->clone(); *tmp = *vkey; tmp->setAutoNormalize(false); tmp->setIntros(true); text += "</verse>"; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); /* if (!quoteStack.empty()) { SWLog::getSystemLog()->logError("popping unclosed quote at end of book"); quoteStack.clear(); } */ } } delete tmp; } // else if (vkey->getChapter()) { // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef()); // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); } } return 0; } SWORD_NAMESPACE_END ";
221  continue;
222  }
223 
224  // handle silly <variant word> items in greek whnu, remove when module is fixed
225  if ((*from == '>') && (*(from-1) < 0)) {
226  text += "&gt;";
227  continue;
228  }
229 
230  if (*from == '>') { // process tokens
231  intoken = false;
232  keepToken = false;
233  suspendTextPassThru = false;
234  newWord = true;
235  handled = false;
236 
237  while (wordStart < (text.c_str() + text.length())) { //hack
238  if (strchr(";,. :?!()'\"", *wordStart) && wordStart[0] && wordStart[1])
239  wordStart++;
240  else break;
241  }
242  while (wordEnd > wordStart) {
243  if (strchr(" ,;:.?!()'\"", *wordEnd))
244  wordEnd--;
245  else break;
246  }
247 
248  // variants
249  if (!strncmp(token, "div type=\"variant\"", 18)) {
250  XMLTag tag = token;
251  text.append("<seg type=\"x-variant\"");
252  SWBuf cls = "x-class:";
253  cls += tag.getAttribute("class");
254  if (cls.length()>8)
255  text.appendFormatted(" subType=\"%s\"", cls.c_str());
256 
257  text += ">";
258  divEnd = "</seg>";
259  newText = true;
260  lastspace = false;
261  handled = true;
262  }
263  // section titles
264  if (!strcmp(token, "div class=\"sechead\"")) {
265 // pushString(&to, "<title>");
266  text.append("<title>");
267  divEnd = "</title>";
268  newText = true;
269  lastspace = false;
270  handled = true;
271  }
272  else if (!strcmp(token, "/div")) {
273  //pushString(&to, divEnd.c_str());
274  text.append(divEnd);
275  lastspace = false;
276  handled = true;
277  }
278  // Scripture Reference
279  if (!strncmp(token, "scripRef", 8)) {
280  // pushString(buf, "<reference osisRef=\"");
281  suspendTextPassThru = true;
282  newText = true;
283  handled = true;
284  }
285  else if (!strncmp(token, "/scripRef", 9)) {
286  SWBuf tmp;
287  tmp = "";
288  tmp.append(textStart, (int)(textEnd - textStart)+1);
289  //pushString(&to, convertToOSIS(tmp.c_str(), key));
290  text.append(VerseKey::convertToOSIS(tmp.c_str(), key));
291  suspendTextPassThru = false;
292  handled = true;
293  }
294 // Usage of italics to represent transChange isn't domaninant;
295 // solution: mark in OSIS instead, assume no semantics other than emphasis
296 // of italicized text
297 // if (!strcmp(module->Type(), "Biblical Texts")) {
298 // // Italics assume transchange for Biblical texts
299 // if (!stricmp(token, "i")) {
300 // pushString(&to, "<transChange type=\"added\">");
301 // newText = true;
302 // lastspace = false;
303 // handled = true;
304 // }
305 // else if (!stricmp(token, "/i")) {
306 // pushString(&to, "</transChange>");
307 // lastspace = false;
308 // handled = true;
309 // }
310 // }
311 // else {
312 // // otherwise, italics are just italics
313 //-- end italics for transchange
314  if (!stricmp(token, "i")) {
315 // pushString(&to, "<hi type=\"i\">");
316  text.append("<hi type=\"i\">");
317  newText = true;
318  lastspace = false;
319  handled = true;
320  }
321  else if (!stricmp(token, "/i")) {
322 // pushString(&to, "</hi>");
323  text.append("</hi>");
324  lastspace = false;
325  handled = true;
326  }
327 // }
328 
329  if (!strcmp(token, "b")) {
330 // pushString(&to, "<hi type=\"b\">");
331  text.append("<hi type=\"b\">");
332  newText = true;
333  lastspace = false;
334  handled = true;
335  }
336  else if (!strcmp(token, "/b")) {
337 // pushString(&to, "</hi>");
338  text.append("</hi>");
339  lastspace = false;
340  handled = true;
341  }
342 
343  // Footnote
344  if (!strncmp(token, "note", 4)) {
345  //pushString(&to, "<note>");
346  text.append("<note>");
347  newText = true;
348  lastspace = false;
349  handled = true;
350  }
351  else if (!strcmp(token, "/note")) {
352  // pushString(&to, "</note>");
353  text.append("</note>");
354  lastspace = false;
355  handled = true;
356  }
357 
358  // Figure
359  else if (!strncmp(token, "img ", 4)) {
360  const char *src = strstr(token, "src");
361  if (!src) // assert we have a src attribute
362  continue;
363 // return false;
364 
365  //pushString(&to, "<figure src=\"");
366  text.append("<figure src=\"");
367 
368  const char* end = strchr(src+2, '"'); //start search behind src="
369 
370  if (end) { //append the path
371  text.append(src+2, end - (src+2));
372  }
373 
374 // const char *c;
375 // for (c = src;((*c) && (*c != '"')); c++);
376 
377 // uncomment for SWORD absolute path logic
378 // if (*(c+1) == '/') {
379 // pushString(buf, "file:");
380 // pushString(buf, module->getConfigEntry("AbsoluteDataPath"));
381 // if (*((*buf)-1) == '/')
382 // c++; // skip '/'
383 // }
384 // end of uncomment for asolute path logic
385 
386 // for (c++;((*c) && (*c != '"')); c++)
387 // *to++ = *c;
388 
389  //pushString(&to, "\" />");
390  text.append("\" />");
391  handled = true;
392  }
393 
394  // Strongs numbers
395  else if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs
396  valto = val;
397  for (unsigned int i = 27; token[i] != '\"' && i < 150; i++)
398  *valto++ = token[i];
399  *valto = 0;
400  if (atoi((!isdigit(*val))?val+1:val) < 5627) {
401  // normal strongs number
402  strstrip(val);
403 
404  if (!strncmp(wordStart, "<w ", 3)) {
405  const char *attStart = strstr(wordStart, "lemma");
406  if (attStart) { //existing morph attribute, append this one to it
407  attStart += 7;
408  buf = "";
409  buf.appendFormatted("strong:%s ", val);
410  }
411  else { // no lemma attribute
412  attStart = wordStart + 3;
413  buf = "";
414  buf.appendFormatted(buf, "lemma=\"strong:%s\" ", val);
415  }
416 
417  text.insert(attStart - text.c_str(), buf);
418  }
419  else { //wordStart doesn't point to an existing <w> attribute!
420  buf = "";
421  buf.appendFormatted("<w lemma=\"strong:%s\">", val);
422  text.insert(wordStart - text.c_str(), buf);
423  text += "</w>";
424  lastspace = false;
425  }
426  }
427  // OLB verb morph, leave it out of OSIS tag
428  else {
429  }
430  handled = true;
431  }
432 
433  // Morphology
434  else if (!strncmp(token, "sync type=\"morph\"", 17)) {
435  SWBuf cls = "";
436  SWBuf morph = "";
437  for (ch = token+17; *ch; ch++) {
438  if (!strncmp(ch, "class=\"", 7)) {
439  valto = val;
440  for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
441  *valto++ = ch[i];
442  *valto = 0;
443  strstrip(val);
444  cls = val;
445  }
446  if (!strncmp(ch, "value=\"", 7)) {
447  valto = val;
448  for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++)
449  *valto++ = ch[i];
450  *valto = 0;
451  strstrip(val);
452  morph = val;
453  }
454  }
455  if (!strncmp(wordStart, "<w ", 3)) {
456  const char *attStart = strstr(wordStart, "morph");
457  if (attStart) { //existing morph attribute, append this one to it
458  attStart += 7;
459  buf = "";
460  buf.appendFormatted("%s:%s ", ((cls.length())?cls.c_str():"robinson"), morph.c_str());
461  }
462  else { // no lemma attribute
463  attStart = wordStart + 3;
464  buf = "";
465  buf.appendFormatted("morph=\"%s:%s\" ", ((cls.length())?cls.c_str():"robinson"), morph.c_str());
466  }
467 
468  text.insert(attStart - text.c_str(), buf); //hack, we have to
469  }
470  else { //no existing <w> attribute fond
471  buf = "";
472  buf.appendFormatted("<w morph=\"%s:%s\">", ((cls.length())?cls.c_str():"robinson"), morph.c_str());
473  text.insert(wordStart - text.c_str(), buf);
474  text += "</w>";
475  lastspace = false;
476 
477  }
478  handled = true;
479  }
480 
481  if (!keepToken) {
482  if (!handled) {
483  SWLog::getSystemLog()->logError("Unprocessed Token: <%s> in key %s", token, key ? (const char*)*key : "<unknown>");
484 // exit(-1);
485  }
486  if (from[1] && strchr(" ,;.:?!()'\"", from[1])) {
487  if (lastspace) {
488  text--;
489  }
490  }
491  if (newText) {
492  textStart = from+1;
493  newText = false;
494  }
495  continue;
496  }
497 
498  // if not a strongs token, keep token in text
499  text.appendFormatted("<%s>", token);
500 
501  if (newText) {
502  textStart = text.c_str() + text.length();
503  newWord = false;
504  }
505  continue;
506  }
507  if (intoken) {
508  if ((tokpos < 2045) && ((*from != 10)&&(*from != 13))) {
509  token[tokpos++] = *from;
510  token[tokpos+2] = 0;
511  }
512  }
513  else {
514  switch (*from) {
515  case '\'':
516  case '\"':
517  case '`':
518 // quoteStack.handleQuote(fromStart, from, &to);
519  text += *from;
520  //from++; //this line removes chars after an apostrophe! Needs fixing.
521  break;
522  default:
523  if (newWord && (*from != ' ')) {
524  wordStart = text.c_str() + text.length();
525  newWord = false;
526 
527  //fix this if required?
528  //memset(to, 0, 10);
529 
530  }
531 
532  if (!suspendTextPassThru) {
533  text += (*from);
534  lastspace = (*from == ' ');
535  }
536  }
537  }
538  }
539 
540  const VerseKey *vkey = SWDYNAMIC_CAST(const VerseKey, key);
541  if (vkey) {
542  SWBuf ref = "";
543  if (vkey->getVerse()) {
544  ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef());
545  }
546 
547  if (ref.length() > 0) {
548 
549  text = ref + text;
550 
551  if (vkey->getVerse()) {
552  VerseKey *tmp = (VerseKey *)vkey->clone();
553  *tmp = *vkey;
554  tmp->setAutoNormalize(false);
555  tmp->setIntros(true);
556 
557  text += "</verse>";
558 
559  *tmp = MAXVERSE;
560  if (*vkey == *tmp) {
561  tmp->setVerse(0);
562 // sprintf(ref, "\t</div>");
563 // pushString(&to, ref);
564  *tmp = MAXCHAPTER;
565  *tmp = MAXVERSE;
566  if (*vkey == *tmp) {
567  tmp->setChapter(0);
568  tmp->setVerse(0);
569 // sprintf(ref, "\t</div>");
570 // pushString(&to, ref);
571 /*
572  if (!quoteStack.empty()) {
573  SWLog::getSystemLog()->logError("popping unclosed quote at end of book");
574  quoteStack.clear();
575  }
576 */
577  }
578  }
579  delete tmp;
580  }
581 // else if (vkey->getChapter()) {
582 // sprintf(ref, "\t<div type=\"chapter\" osisID=\"%s\">", vkey->getOSISRef());
583 // }
584 // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef());
585  }
586  }
587  return 0;
588 }
589 
590 
#define SWORD_NAMESPACE_START
Definition: defs.h:39
SWBuf & appendFormatted(const char *format,...)
Definition: swbuf.cpp:81
virtual SWKey * clone() const
Definition: versekey.cpp:278
Definition: swbuf.h:47
unsigned long length() const
Definition: swbuf.h:197
#define MAXVERSE
Definition: versekey.h:43
static SWLog * getSystemLog()
Definition: swlog.cpp:53
virtual void setChapter(int ichapter)
Definition: versekey.cpp:1603
SWText * module
Definition: osis2mod.cpp:105
Definition: utilxml.h:38
int stricmp(const char *s1, const char *s2)
Definition: utilstr.cpp:194
static const char * convertToOSIS(const char *inRef, const SWKey *defaultKey)
Definition: versekey.cpp:1887
void insert(unsigned long pos, const char *str, unsigned long start=0, signed long max=-1)
Definition: swbuf.cpp:99
virtual ~ThMLOSIS()
Definition: thmlosis.cpp:41
virtual void setIntros(bool val)
Definition: versekey.cpp:1663
return NULL
Definition: regex.c:7953
const char * c_str() const
Definition: swbuf.h:158
SWBuf & append(const char *str, long max=-1)
Definition: swbuf.h:274
virtual void setVerse(int iverse)
Definition: versekey.cpp:1622
virtual int getVerse() const
Definition: versekey.cpp:1534
virtual const char * getOSISRef() const
Definition: versekey.cpp:1810
ThMLOSIS()
Definition: thmlosis.cpp:37
#define SWDYNAMIC_CAST(className, object)
Definition: defs.h:47
const char * getAttribute(const char *attribName, int partNum=-1, char partSplit= '|') const
Definition: utilxml.cpp:230
virtual char processText(SWBuf &text, const SWKey *key=0, const SWModule *module=0)
Definition: thmlosis.cpp:45
char * strstrip(char *istr)
Definition: utilstr.cpp:118
void logError(const char *fmt,...) const
Definition: swlog.cpp:87
int strnicmp(const char *s1, const char *s2, int len)
Definition: utilstr.cpp:180
#define SWORD_NAMESPACE_END
Definition: defs.h:40
Definition: swkey.h:77
#define MAXCHAPTER
Definition: versekey.h:44
virtual void setAutoNormalize(bool iautonorm)
Definition: versekey.cpp:1648