[sword-svn] r1942 - in trunk: include lib/bcppmake src/mgr src/modules/filters
chrislit at crosswire.org
chrislit at crosswire.org
Wed Jul 5 21:35:10 MST 2006
Author: chrislit
Date: 2006-07-05 21:34:59 -0700 (Wed, 05 Jul 2006)
New Revision: 1942
Added:
trunk/include/teiplain.h
trunk/include/teirtf.h
trunk/src/modules/filters/teiplain.cpp
trunk/src/modules/filters/teirtf.cpp
Modified:
trunk/include/defs.h
trunk/include/markupfiltmgr.h
trunk/include/swmgr.h
trunk/lib/bcppmake/libsword.bpf
trunk/lib/bcppmake/libsword.bpr
trunk/src/mgr/markupfiltmgr.cpp
trunk/src/mgr/swmgr.cpp
Log:
Initial TEI support (mostly for lexicons)
Modified: trunk/include/defs.h
===================================================================
--- trunk/include/defs.h 2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/include/defs.h 2006-07-06 04:34:59 UTC (rev 1942)
@@ -102,7 +102,7 @@
#endif
enum {DIRECTION_LTR = 0, DIRECTION_RTL, DIRECTION_BIDI};
-enum {FMT_UNKNOWN = 0, FMT_PLAIN, FMT_THML, FMT_GBF, FMT_HTML, FMT_HTMLHREF, FMT_RTF, FMT_OSIS, FMT_WEBIF};
+enum {FMT_UNKNOWN = 0, FMT_PLAIN, FMT_THML, FMT_GBF, FMT_HTML, FMT_HTMLHREF, FMT_RTF, FMT_OSIS, FMT_WEBIF, FMT_TEI};
enum {ENC_UNKNOWN = 0, ENC_LATIN1, ENC_UTF8, ENC_SCSU, ENC_UTF16, ENC_RTF, ENC_HTML};
SWORD_NAMESPACE_END
Modified: trunk/include/markupfiltmgr.h
===================================================================
--- trunk/include/markupfiltmgr.h 2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/include/markupfiltmgr.h 2006-07-06 04:34:59 UTC (rev 1942)
@@ -36,6 +36,7 @@
SWFilter* fromgbf;
SWFilter* fromplain;
SWFilter* fromosis;
+ SWFilter* fromtei;
/**
* current markup value
Modified: trunk/include/swmgr.h
===================================================================
--- trunk/include/swmgr.h 2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/include/swmgr.h 2006-07-06 04:34:59 UTC (rev 1942)
@@ -106,6 +106,7 @@
SWFilter *gbfplain;
SWFilter *thmlplain;
SWFilter *osisplain;
+ SWFilter *teiplain;
SWOptionFilter *transliterator;
FilterList cleanupFilters;
StringList options;
Added: trunk/include/teiplain.h
===================================================================
--- trunk/include/teiplain.h 2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/include/teiplain.h 2006-07-06 04:34:59 UTC (rev 1942)
@@ -0,0 +1,49 @@
+/******************************************************************************
+ *
+ * $Id:
+ *
+ * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#ifndef TEIPLAIN_H
+#define TEIPLAIN_H
+
+#include <swbasicfilter.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+/** this filter converts TEI text to plain text
+ */
+class SWDLLEXPORT TEIPlain : public SWBasicFilter {
+public:
+protected:
+ class MyUserData : public BasicFilterUserData {
+ public:
+ SWBuf w;
+ XMLTag tag;
+ MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {}
+ };
+ virtual BasicFilterUserData *createUserData(const SWModule *module, const SWKey *key) {
+ return new MyUserData(module, key);
+ }
+ virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData);
+public:
+ TEIPlain();
+};
+
+SWORD_NAMESPACE_END
+#endif
Added: trunk/include/teirtf.h
===================================================================
--- trunk/include/teirtf.h 2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/include/teirtf.h 2006-07-06 04:34:59 UTC (rev 1942)
@@ -0,0 +1,50 @@
+/******************************************************************************
+ *
+ * $Id:
+ *
+ * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#ifndef TEIRTF_H
+#define TEIRTF_H
+
+#include <swbasicfilter.h>
+
+SWORD_NAMESPACE_START
+
+/** this filter converts TEI text to RTF text
+ */
+class SWDLLEXPORT TEIRTF : public SWBasicFilter {
+private:
+
+protected:
+ class MyUserData : public BasicFilterUserData {
+ public:
+ bool BiblicalText;
+ SWBuf w;
+ SWBuf version;
+ MyUserData(const SWModule *module, const SWKey *key);
+ };
+ virtual BasicFilterUserData *createUserData(const SWModule *module, const SWKey *key) {
+ return new MyUserData(module, key);
+ }
+ virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData);
+public:
+ TEIRTF();
+};
+
+SWORD_NAMESPACE_END
+#endif
Modified: trunk/lib/bcppmake/libsword.bpf
===================================================================
--- trunk/lib/bcppmake/libsword.bpf 2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/lib/bcppmake/libsword.bpf 2006-07-06 04:34:59 UTC (rev 1942)
@@ -144,6 +144,8 @@
USEUNIT("..\..\src\utilfuns\ftpparse.c");
USEUNIT("..\..\src\modules\filters\osismorphsegmentation.cpp");
USEUNIT("..\..\src\keys\versetreekey.cpp");
+USEUNIT("..\..\src\modules\filters\teiplain.cpp");
+USEUNIT("..\..\src\modules\filters\teirtf.cpp");
//---------------------------------------------------------------------------
#define Library
Modified: trunk/lib/bcppmake/libsword.bpr
===================================================================
--- trunk/lib/bcppmake/libsword.bpr 2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/lib/bcppmake/libsword.bpr 2006-07-06 04:34:59 UTC (rev 1942)
@@ -114,7 +114,8 @@
..\..\src\utilfuns\zlib\adler32.obj
..\..\src\modules\filters\papyriplain.obj ..\..\src\utilfuns\ftpparse.obj
..\..\src\modules\filters\osismorphsegmentation.obj
- ..\..\src\keys\versetreekey.obj"/>
+ ..\..\src\keys\versetreekey.obj ..\..\src\modules\filters\teiplain.obj
+ ..\..\src\modules\filters\teirtf.obj"/>
<RESFILES value=""/>
<IDLFILES value=""/>
<IDLGENFILES value=""/>
Modified: trunk/src/mgr/markupfiltmgr.cpp
===================================================================
--- trunk/src/mgr/markupfiltmgr.cpp 2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/src/mgr/markupfiltmgr.cpp 2006-07-06 04:34:59 UTC (rev 1942)
@@ -21,6 +21,8 @@
#include <thmlplain.h>
#include <gbfplain.h>
+#include <osisplain.h>
+#include <teiplain.h>
#include <thmlgbf.h>
#include <gbfthml.h>
#include <thmlhtml.h>
@@ -33,6 +35,7 @@
#include <gbfosis.h>
#include <thmlosis.h>
#include <osisrtf.h>
+#include <teirtf.h>
#include <osisosis.h>
#include <osishtmlhref.h>
#include <gbfwebif.h>
@@ -76,6 +79,8 @@
delete (fromplain);
if (fromosis)
delete (fromosis);
+ if (fromtei)
+ delete (fromtei);
}
/******************************************************************************
@@ -94,6 +99,7 @@
SWFilter * oldthml = fromthml;
SWFilter * oldgbf = fromgbf;
SWFilter * oldosis = fromosis;
+ SWFilter * oldtei = fromtei;
CreateFilters(markup);
@@ -159,6 +165,21 @@
}
break;
}
+ case FMT_TEI:
+ if (oldtei != fromtei) {
+ if (oldtei) {
+ if (!fromtei) {
+ module->second->RemoveRenderFilter(oldtei);
+ }
+ else {
+ module->second->ReplaceRenderFilter(oldtei, fromtei);
+ }
+ }
+ else if (fromtei) {
+ module->second->AddRenderFilter(fromtei);
+ }
+ break;
+ }
}
if (oldthml)
@@ -169,6 +190,8 @@
delete oldplain;
if (oldosis)
delete oldosis;
+ if (oldtei)
+ delete oldtei;
}
return markup;
}
@@ -191,6 +214,10 @@
if (fromosis)
module->AddRenderFilter(fromosis);
break;
+ case FMT_TEI:
+ if (fromtei)
+ module->AddRenderFilter(fromtei);
+ break;
}
}
@@ -201,50 +228,65 @@
fromplain = NULL;
fromthml = new ThMLPlain();
fromgbf = new GBFPlain();
- fromosis = NULL;
+ fromosis = new OSISPlain();
+ fromtei = TEIPlain();
break;
case FMT_THML:
fromplain = NULL;
fromthml = NULL;
fromgbf = new GBFThML();
fromosis = NULL;
+ fromtei = NULL;
break;
case FMT_GBF:
fromplain = NULL;
fromthml = new ThMLGBF();
fromgbf = NULL;
fromosis = NULL;
+ fromtei = NULL;
break;
case FMT_HTML:
fromplain = new PLAINHTML();
fromthml = new ThMLHTML();
fromgbf = new GBFHTML();
fromosis = NULL;
+ fromtei = NULL;
break;
case FMT_HTMLHREF:
fromplain = new PLAINHTML();
fromthml = new ThMLHTMLHREF();
fromgbf = new GBFHTMLHREF();
fromosis = new OSISHTMLHREF();
+ fromtei = NULL;
break;
case FMT_RTF:
fromplain = NULL;
fromthml = new ThMLRTF();
fromgbf = new GBFRTF();
fromosis = new OSISRTF();
+ fromtei = new TEIRTF();
break;
case FMT_OSIS:
fromplain = NULL;
fromthml = new ThMLOSIS();
fromgbf = new GBFOSIS();
fromosis = new OSISOSIS();
+ fromtei = NULL;
break;
case FMT_WEBIF:
fromplain = NULL;
fromthml = new ThMLWEBIF();
fromgbf = new GBFWEBIF();
fromosis = new OSISWEBIF();
+ fromtei = NULL;
break;
+ case FMT_TEI:
+ fromplain = NULL;
+ fromthml = NULL;
+ fromgbf = NULL;
+ fromosis = NULL;
+ fromtei = NULL;
+ break;
}
}
Modified: trunk/src/mgr/swmgr.cpp
===================================================================
--- trunk/src/mgr/swmgr.cpp 2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/src/mgr/swmgr.cpp 2006-07-06 04:34:59 UTC (rev 1942)
@@ -42,6 +42,7 @@
#include <gbfplain.h>
#include <thmlplain.h>
#include <osisplain.h>
+#include <teiplain.h>
#include <papyriplain.h>
#include <gbfstrongs.h>
#include <gbffootnotes.h>
@@ -242,6 +243,9 @@
osisplain = new OSISPlain();
cleanupFilters.push_back(osisplain);
+
+ teiplain = new TEIPlain();
+ cleanupFilters.push_back(teiplain);
//#endif
}
@@ -713,6 +717,8 @@
markup = FMT_THML;
else if (!stricmp(sourceformat.c_str(), "OSIS"))
markup = FMT_OSIS;
+ else if (!stricmp(sourceformat.c_str(), "TEI"))
+ markup = FMT_TEI;
else
markup = FMT_GBF;
@@ -986,6 +992,9 @@
else if (!stricmp(sourceformat.c_str(), "OSIS")) {
module->AddStripFilter(osisplain);
}
+ else if (!stricmp(sourceformat.c_str(), "TEI")) {
+ module->AddStripFilter(teiplain);
+ }
if (filterMgr)
filterMgr->AddStripFilters(module, section);
Added: trunk/src/modules/filters/teiplain.cpp
===================================================================
--- trunk/src/modules/filters/teiplain.cpp 2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/src/modules/filters/teiplain.cpp 2006-07-06 04:34:59 UTC (rev 1942)
@@ -0,0 +1,116 @@
+/***************************************************************************
+ teiplain.cpp - TEI to Plaintext filter
+ -------------------
+ begin : 2006-07-05
+ copyright : 2006 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <teiplain.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+TEIPlain::TEIPlain() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+
+ setTokenCaseSensitive(true);
+}
+
+
+bool TEIPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ // <p> paragraph tag
+ if (!strcmp(tag.getName(), "p")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
+ buf += "\n";
+ }
+ else if (tag.isEndTag()) { // end tag
+ buf += "\n";
+ userData->supressAdjacentWhitespace = true;
+ }
+ else { // empty paragraph break marker
+ buf += "\n\n";
+ userData->supressAdjacentWhitespace = true;
+ }
+ }
+
+ // <entryFree>
+ else if (!strcmp(tag.getName(), "entryFree")) {
+ SWBuf n = tag.getAttribute("n");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (n != "") {
+ buf += n;
+ buf += ". ";
+ }
+ }
+ }
+
+ // <sense>
+ else if (!strcmp(tag.getName(), "sense")) {
+ SWBuf n = tag.getAttribute("n");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (n != "") {
+ buf += n;
+ buf += ". ";
+ }
+ }
+ else if (tag.isEndTag()) {
+ buf += "\n";
+ }
+ }
+
+ // <div>
+ else if (!strcmp(tag.getName(), "div")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf.append("\n\n\n");
+ }
+ else if (tag.isEndTag()) {
+ }
+ }
+
+ // <etym>
+ else if (!strcmp(tag.getName(), "etym")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "[";
+ }
+ else if (tag.isEndTag()) {
+ buf += "]";
+ }
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
Added: trunk/src/modules/filters/teirtf.cpp
===================================================================
--- trunk/src/modules/filters/teirtf.cpp 2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/src/modules/filters/teirtf.cpp 2006-07-06 04:34:59 UTC (rev 1942)
@@ -0,0 +1,207 @@
+/***************************************************************************
+ teirtf.cpp - TEI to RTF filter
+ -------------------
+ begin : 2006-07-03
+ copyright : 2006 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <teirtf.h>
+#include <utilxml.h>
+#include <swmodule.h>
+
+SWORD_NAMESPACE_START
+
+
+TEIRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+ BiblicalText = false;
+ if (module) {
+ version = module->Name();
+ BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+ }
+}
+
+
+TEIRTF::TEIRTF() {
+ setTokenStart("<");
+ setTokenEnd(">");
+
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ setEscapeStringCaseSensitive(true);
+
+ addEscapeStringSubstitute("amp", "&");
+ addEscapeStringSubstitute("apos", "'");
+ addEscapeStringSubstitute("lt", "<");
+ addEscapeStringSubstitute("gt", ">");
+ addEscapeStringSubstitute("quot", "\"");
+
+ setTokenCaseSensitive(true);
+}
+
+
+bool TEIRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ // manually process if it wasn't a simple substitution
+ if (!substituteToken(buf, token)) {
+ MyUserData *u = (MyUserData *)userData;
+ XMLTag tag(token);
+
+ // <p> paragraph tag
+ if (!strcmp(tag.getName(), "p")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag
+ buf += "{\\par}";
+ }
+ else if (tag.isEndTag()) { // end tag
+ buf += "{\\par}";
+ userData->supressAdjacentWhitespace = true;
+ }
+ else { // empty paragraph break marker
+ buf += "{\\par\\par}";
+ userData->supressAdjacentWhitespace = true;
+ }
+ }
+
+ // <hi>
+ else if (!strcmp(tag.getName(), "hi")) {
+ SWBuf rend = tag.getAttribute("rend");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (rend == "ital")
+ buf += "{\\i1 ";
+ else if (rend == "bold")
+ buf += "{\\b1 ";
+ else if (rend == "sup")
+ buf += "{\\super ";
+
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <entryFree>
+ else if (!strcmp(tag.getName(), "entryFree")) {
+ SWBuf n = tag.getAttribute("n");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (n != "") {
+ buf += "{\\b1 ";
+ buf += n;
+ buf += ". }"; }
+ }
+ }
+
+ // <sense>
+ else if (!strcmp(tag.getName(), "sense")) {
+ SWBuf n = tag.getAttribute("n");
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ if (n != "") {
+ buf += "{\\par\\b1 ";
+ buf += n;
+ buf += ". }";
+ }
+ }
+ else if (tag.isEndTag()) {
+ buf += "\\par ";
+ }
+ }
+
+ // <div>
+ else if (!strcmp(tag.getName(), "div")) {
+
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf.append("\\par\\par\\pard ");
+ }
+ else if (tag.isEndTag()) {
+ }
+ }
+
+ // <pos>
+ else if (!strcmp(tag.getName(), "pos")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{\\i1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <gen>
+ else if (!strcmp(tag.getName(), "gen")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{\\i1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <case>
+ else if (!strcmp(tag.getName(), "case")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{\\i1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <tr>
+ else if (!strcmp(tag.getName(), "tr")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{\\i1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <number>
+ else if (!strcmp(tag.getName(), "number")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{\\i1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <mood>
+ else if (!strcmp(tag.getName(), "mood")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "{\\i1 ";
+ }
+ else if (tag.isEndTag()) {
+ buf += "}";
+ }
+ }
+
+ // <etym>
+ else if (!strcmp(tag.getName(), "etym")) {
+ if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+ buf += "[";
+ }
+ else if (tag.isEndTag()) {
+ buf += "]";
+ }
+ }
+
+ else {
+ return false; // we still didn't handle token
+ }
+ }
+ return true;
+}
+
+
+SWORD_NAMESPACE_END
+
More information about the sword-cvs
mailing list