[sword-svn] r1942 - in trunk: include lib/bcppmake src/mgr src/modules/filters

chrislit at crosswire.org chrislit at crosswire.org
Wed Jul 5 21:35:10 MST 2006


Author: chrislit
Date: 2006-07-05 21:34:59 -0700 (Wed, 05 Jul 2006)
New Revision: 1942

Added:
   trunk/include/teiplain.h
   trunk/include/teirtf.h
   trunk/src/modules/filters/teiplain.cpp
   trunk/src/modules/filters/teirtf.cpp
Modified:
   trunk/include/defs.h
   trunk/include/markupfiltmgr.h
   trunk/include/swmgr.h
   trunk/lib/bcppmake/libsword.bpf
   trunk/lib/bcppmake/libsword.bpr
   trunk/src/mgr/markupfiltmgr.cpp
   trunk/src/mgr/swmgr.cpp
Log:
Initial TEI support (mostly for lexicons)


Modified: trunk/include/defs.h
===================================================================
--- trunk/include/defs.h	2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/include/defs.h	2006-07-06 04:34:59 UTC (rev 1942)
@@ -102,7 +102,7 @@
 #endif
 
 enum {DIRECTION_LTR = 0, DIRECTION_RTL, DIRECTION_BIDI};
-enum {FMT_UNKNOWN = 0, FMT_PLAIN, FMT_THML, FMT_GBF, FMT_HTML, FMT_HTMLHREF, FMT_RTF, FMT_OSIS, FMT_WEBIF};
+enum {FMT_UNKNOWN = 0, FMT_PLAIN, FMT_THML, FMT_GBF, FMT_HTML, FMT_HTMLHREF, FMT_RTF, FMT_OSIS, FMT_WEBIF, FMT_TEI};
 enum {ENC_UNKNOWN = 0, ENC_LATIN1, ENC_UTF8, ENC_SCSU, ENC_UTF16, ENC_RTF, ENC_HTML};
 
 SWORD_NAMESPACE_END

Modified: trunk/include/markupfiltmgr.h
===================================================================
--- trunk/include/markupfiltmgr.h	2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/include/markupfiltmgr.h	2006-07-06 04:34:59 UTC (rev 1942)
@@ -36,6 +36,7 @@
         SWFilter* fromgbf;
         SWFilter* fromplain;
         SWFilter* fromosis;
+        SWFilter* fromtei;
 
 				/**
 	 			* current markup value

Modified: trunk/include/swmgr.h
===================================================================
--- trunk/include/swmgr.h	2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/include/swmgr.h	2006-07-06 04:34:59 UTC (rev 1942)
@@ -106,6 +106,7 @@
 	SWFilter *gbfplain;
 	SWFilter *thmlplain;
 	SWFilter *osisplain;
+	SWFilter *teiplain;
 	SWOptionFilter *transliterator;
 	FilterList cleanupFilters;
 	StringList options;

Added: trunk/include/teiplain.h
===================================================================
--- trunk/include/teiplain.h	2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/include/teiplain.h	2006-07-06 04:34:59 UTC (rev 1942)
@@ -0,0 +1,49 @@
+/******************************************************************************
+ *
+ * $Id:
+ *
+ * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org)
+ *	CrossWire Bible Society
+ *	P. O. Box 2528
+ *	Tempe, AZ  85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#ifndef TEIPLAIN_H
+#define TEIPLAIN_H
+
+#include <swbasicfilter.h>
+#include <utilxml.h>
+
+SWORD_NAMESPACE_START
+
+/** this filter converts TEI text to plain text
+ */
+class SWDLLEXPORT TEIPlain : public SWBasicFilter {
+public:
+protected:
+	class MyUserData : public BasicFilterUserData {
+	public:
+		SWBuf w;
+		XMLTag tag;
+		MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {}
+	};
+	virtual BasicFilterUserData *createUserData(const SWModule *module, const SWKey *key) {
+		return new MyUserData(module, key);
+	}
+	virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData);
+public:
+	TEIPlain();
+};
+
+SWORD_NAMESPACE_END
+#endif

Added: trunk/include/teirtf.h
===================================================================
--- trunk/include/teirtf.h	2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/include/teirtf.h	2006-07-06 04:34:59 UTC (rev 1942)
@@ -0,0 +1,50 @@
+/******************************************************************************
+ *
+ * $Id:
+ *
+ * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org)
+ *	CrossWire Bible Society
+ *	P. O. Box 2528
+ *	Tempe, AZ  85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#ifndef TEIRTF_H
+#define TEIRTF_H
+
+#include <swbasicfilter.h>
+
+SWORD_NAMESPACE_START
+
+/** this filter converts TEI text to RTF text
+ */
+class SWDLLEXPORT TEIRTF : public SWBasicFilter {
+private:
+
+protected:
+	class MyUserData : public BasicFilterUserData {
+	public:
+		bool BiblicalText;
+		SWBuf w;
+		SWBuf version;
+		MyUserData(const SWModule *module, const SWKey *key);
+	};
+	virtual BasicFilterUserData *createUserData(const SWModule *module, const SWKey *key) {
+		return new MyUserData(module, key);
+	}
+	virtual bool handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData);
+public:
+	TEIRTF();
+};
+
+SWORD_NAMESPACE_END
+#endif

Modified: trunk/lib/bcppmake/libsword.bpf
===================================================================
--- trunk/lib/bcppmake/libsword.bpf	2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/lib/bcppmake/libsword.bpf	2006-07-06 04:34:59 UTC (rev 1942)
@@ -144,6 +144,8 @@
 USEUNIT("..\..\src\utilfuns\ftpparse.c");
 USEUNIT("..\..\src\modules\filters\osismorphsegmentation.cpp");
 USEUNIT("..\..\src\keys\versetreekey.cpp");
+USEUNIT("..\..\src\modules\filters\teiplain.cpp");
+USEUNIT("..\..\src\modules\filters\teirtf.cpp");
 //---------------------------------------------------------------------------
 #define Library
 

Modified: trunk/lib/bcppmake/libsword.bpr
===================================================================
--- trunk/lib/bcppmake/libsword.bpr	2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/lib/bcppmake/libsword.bpr	2006-07-06 04:34:59 UTC (rev 1942)
@@ -114,7 +114,8 @@
       ..\..\src\utilfuns\zlib\adler32.obj 
       ..\..\src\modules\filters\papyriplain.obj ..\..\src\utilfuns\ftpparse.obj 
       ..\..\src\modules\filters\osismorphsegmentation.obj 
-      ..\..\src\keys\versetreekey.obj"/>
+      ..\..\src\keys\versetreekey.obj ..\..\src\modules\filters\teiplain.obj 
+      ..\..\src\modules\filters\teirtf.obj"/>
     <RESFILES value=""/>
     <IDLFILES value=""/>
     <IDLGENFILES value=""/>

Modified: trunk/src/mgr/markupfiltmgr.cpp
===================================================================
--- trunk/src/mgr/markupfiltmgr.cpp	2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/src/mgr/markupfiltmgr.cpp	2006-07-06 04:34:59 UTC (rev 1942)
@@ -21,6 +21,8 @@
 
 #include <thmlplain.h>
 #include <gbfplain.h>
+#include <osisplain.h>
+#include <teiplain.h>
 #include <thmlgbf.h>
 #include <gbfthml.h>
 #include <thmlhtml.h>
@@ -33,6 +35,7 @@
 #include <gbfosis.h>
 #include <thmlosis.h>
 #include <osisrtf.h>
+#include <teirtf.h>
 #include <osisosis.h>
 #include <osishtmlhref.h>
 #include <gbfwebif.h>
@@ -76,6 +79,8 @@
                 delete (fromplain);
         if (fromosis)
                 delete (fromosis);
+        if (fromtei)
+                delete (fromtei);
 }
 
 /******************************************************************************
@@ -94,6 +99,7 @@
                 SWFilter * oldthml = fromthml;
                 SWFilter * oldgbf = fromgbf;
                 SWFilter * oldosis = fromosis;
+                SWFilter * oldtei = fromtei;
 
                 CreateFilters(markup);
 
@@ -159,6 +165,21 @@
                                         }
                                         break;
                                 }
+                        case FMT_TEI:
+                                if (oldtei != fromtei) {
+                                        if (oldtei) {
+                                                if (!fromtei) {
+                                                        module->second->RemoveRenderFilter(oldtei);
+                                                }
+                                                else {
+                                                        module->second->ReplaceRenderFilter(oldtei, fromtei);
+                                                }
+                                        }
+                                        else if (fromtei) {
+                                                module->second->AddRenderFilter(fromtei);
+                                        }
+                                        break;
+                                }
                         }
 
                 if (oldthml)
@@ -169,6 +190,8 @@
                         delete oldplain;
                 if (oldosis)
                         delete oldosis;
+                if (oldtei)
+                        delete oldtei;
         }
         return markup;
 }
@@ -191,6 +214,10 @@
                 if (fromosis)
                         module->AddRenderFilter(fromosis);
                 break;
+        case FMT_TEI:
+                if (fromtei)
+                        module->AddRenderFilter(fromtei);
+                break;
         }
 }
 
@@ -201,50 +228,65 @@
                         fromplain = NULL;
                         fromthml = new ThMLPlain();
                         fromgbf = new GBFPlain();
-                        fromosis = NULL;
+                        fromosis = new OSISPlain();
+                        fromtei = TEIPlain();
                         break;
                 case FMT_THML:
                         fromplain = NULL;
                         fromthml = NULL;
                         fromgbf = new GBFThML();
                         fromosis = NULL;
+                        fromtei = NULL;
                         break;
                 case FMT_GBF:
                         fromplain = NULL;
                         fromthml = new ThMLGBF();
                         fromgbf = NULL;
                         fromosis = NULL;
+                        fromtei = NULL;
                         break;
                 case FMT_HTML:
                         fromplain = new PLAINHTML();
                         fromthml = new ThMLHTML();
                         fromgbf = new GBFHTML();
                         fromosis = NULL;
+                        fromtei = NULL;
                         break;
                 case FMT_HTMLHREF:
                         fromplain = new PLAINHTML();
                         fromthml = new ThMLHTMLHREF();
                         fromgbf = new GBFHTMLHREF();
                         fromosis = new OSISHTMLHREF();
+                        fromtei = NULL;
                         break;
                 case FMT_RTF:
                         fromplain = NULL;
                         fromthml = new ThMLRTF();
                         fromgbf = new GBFRTF();
                         fromosis = new OSISRTF();
+                        fromtei = new TEIRTF();
                         break;
                 case FMT_OSIS:
                         fromplain = NULL;
                         fromthml = new ThMLOSIS();
                         fromgbf = new GBFOSIS();
                         fromosis = new OSISOSIS();
+                        fromtei = NULL;
                         break;
                 case FMT_WEBIF:
                         fromplain = NULL;
                         fromthml = new ThMLWEBIF();
                         fromgbf = new GBFWEBIF();
                         fromosis = new OSISWEBIF();
+                        fromtei = NULL;
                         break;
+                case FMT_TEI:
+                        fromplain = NULL;
+                        fromthml = NULL;
+                        fromgbf = NULL;
+                        fromosis = NULL;
+                        fromtei = NULL;
+                        break;
                 }
 
 }

Modified: trunk/src/mgr/swmgr.cpp
===================================================================
--- trunk/src/mgr/swmgr.cpp	2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/src/mgr/swmgr.cpp	2006-07-06 04:34:59 UTC (rev 1942)
@@ -42,6 +42,7 @@
 #include <gbfplain.h>
 #include <thmlplain.h>
 #include <osisplain.h>
+#include <teiplain.h>
 #include <papyriplain.h>
 #include <gbfstrongs.h>
 #include <gbffootnotes.h>
@@ -242,6 +243,9 @@
 
 	osisplain = new OSISPlain();
 	cleanupFilters.push_back(osisplain);
+
+	teiplain = new TEIPlain();
+	cleanupFilters.push_back(teiplain);
 //#endif
 }
 
@@ -713,6 +717,8 @@
 		markup = FMT_THML;
 	else if (!stricmp(sourceformat.c_str(), "OSIS"))
 		markup = FMT_OSIS;
+	else if (!stricmp(sourceformat.c_str(), "TEI"))
+		markup = FMT_TEI;
 	else
 		markup = FMT_GBF;
 
@@ -986,6 +992,9 @@
 	else if (!stricmp(sourceformat.c_str(), "OSIS")) {
 		module->AddStripFilter(osisplain);
 	}
+	else if (!stricmp(sourceformat.c_str(), "TEI")) {
+		module->AddStripFilter(teiplain);
+	}
 
 	if (filterMgr)
 		filterMgr->AddStripFilters(module, section);

Added: trunk/src/modules/filters/teiplain.cpp
===================================================================
--- trunk/src/modules/filters/teiplain.cpp	2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/src/modules/filters/teiplain.cpp	2006-07-06 04:34:59 UTC (rev 1942)
@@ -0,0 +1,116 @@
+/***************************************************************************
+                     teiplain.cpp  -  TEI to Plaintext filter
+                             -------------------
+    begin                : 2006-07-05
+    copyright            : 2006 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <teiplain.h>
+#include <ctype.h>
+
+SWORD_NAMESPACE_START
+
+TEIPlain::TEIPlain() {
+	setTokenStart("<");
+	setTokenEnd(">");
+
+	setEscapeStart("&");
+	setEscapeEnd(";");
+
+	setEscapeStringCaseSensitive(true);
+
+	addEscapeStringSubstitute("amp", "&");
+	addEscapeStringSubstitute("apos", "'");
+	addEscapeStringSubstitute("lt", "<");
+	addEscapeStringSubstitute("gt", ">");
+	addEscapeStringSubstitute("quot", "\"");
+
+	setTokenCaseSensitive(true);
+}
+
+
+bool TEIPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+  // manually process if it wasn't a simple substitution
+	if (!substituteToken(buf, token)) {
+		MyUserData *u = (MyUserData *)userData;
+		XMLTag tag(token);
+
+		// <p> paragraph tag
+		if (!strcmp(tag.getName(), "p")) {
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {	// non-empty start tag
+				buf += "\n";
+			}
+			else if (tag.isEndTag()) {	// end tag
+				buf += "\n";
+				userData->supressAdjacentWhitespace = true;
+			}
+			else {					// empty paragraph break marker
+				buf += "\n\n";
+				userData->supressAdjacentWhitespace = true;
+			}
+		}
+
+		// <entryFree>
+		else if (!strcmp(tag.getName(), "entryFree")) {
+			SWBuf n = tag.getAttribute("n");
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+			        if (n != "") {
+					buf += n;
+					buf += ". ";
+				}
+			}
+		}
+
+		// <sense>
+		else if (!strcmp(tag.getName(), "sense")) {
+			SWBuf n = tag.getAttribute("n");
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+			        if (n != "") {
+					buf += n;
+					buf += ". ";
+				}
+			}
+			else if (tag.isEndTag()) {
+			                buf += "\n";
+			}
+		}
+
+		// <div>
+		else if (!strcmp(tag.getName(), "div")) {
+
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+				buf.append("\n\n\n");
+			}
+			else if (tag.isEndTag()) {
+			}
+		}
+
+		// <etym>
+		else if (!strcmp(tag.getName(), "etym")) {
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+				buf += "[";
+			}
+			else if (tag.isEndTag()) {
+			        buf += "]";
+			}
+		}
+
+		else {
+			return false;  // we still didn't handle token
+		}
+	}
+	return true;
+}
+
+
+SWORD_NAMESPACE_END

Added: trunk/src/modules/filters/teirtf.cpp
===================================================================
--- trunk/src/modules/filters/teirtf.cpp	2006-07-04 20:42:39 UTC (rev 1941)
+++ trunk/src/modules/filters/teirtf.cpp	2006-07-06 04:34:59 UTC (rev 1942)
@@ -0,0 +1,207 @@
+/***************************************************************************
+                     teirtf.cpp  -  TEI to RTF filter
+                             -------------------
+    begin                : 2006-07-03
+    copyright            : 2006 by CrossWire Bible Society
+ ***************************************************************************/
+
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <teirtf.h>
+#include <utilxml.h>
+#include <swmodule.h>
+
+SWORD_NAMESPACE_START
+
+
+TEIRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {
+	BiblicalText = false;
+	if (module) {
+		version = module->Name();
+		BiblicalText = (!strcmp(module->Type(), "Biblical Texts"));
+	}
+}
+
+
+TEIRTF::TEIRTF() {
+	setTokenStart("<");
+	setTokenEnd(">");
+
+	setEscapeStart("&");
+	setEscapeEnd(";");
+
+	setEscapeStringCaseSensitive(true);
+
+	addEscapeStringSubstitute("amp", "&");
+	addEscapeStringSubstitute("apos", "'");
+	addEscapeStringSubstitute("lt", "<");
+	addEscapeStringSubstitute("gt", ">");
+	addEscapeStringSubstitute("quot", "\"");
+
+	setTokenCaseSensitive(true);
+}
+
+
+bool TEIRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+  // manually process if it wasn't a simple substitution
+	if (!substituteToken(buf, token)) {
+		MyUserData *u = (MyUserData *)userData;
+		XMLTag tag(token);
+
+		// <p> paragraph tag
+		if (!strcmp(tag.getName(), "p")) {
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {	// non-empty start tag
+				buf += "{\\par}";
+			}
+			else if (tag.isEndTag()) {	// end tag
+				buf += "{\\par}";
+				userData->supressAdjacentWhitespace = true;
+			}
+			else {					// empty paragraph break marker
+				buf += "{\\par\\par}";
+				userData->supressAdjacentWhitespace = true;
+			}
+		}
+
+		// <hi>
+		else if (!strcmp(tag.getName(), "hi")) {
+			SWBuf rend = tag.getAttribute("rend");
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+				if (rend == "ital")
+					buf += "{\\i1 ";
+				else if (rend == "bold")
+					buf += "{\\b1 ";
+				else if (rend == "sup")
+				        buf += "{\\super ";
+
+			}
+			else if (tag.isEndTag()) {
+				buf += "}";
+			}
+		}
+
+		// <entryFree>
+		else if (!strcmp(tag.getName(), "entryFree")) {
+			SWBuf n = tag.getAttribute("n");
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+			        if (n != "") {
+                                	buf += "{\\b1 ";
+					buf += n;
+					buf += ". }";				}
+			}
+		}
+
+		// <sense>
+		else if (!strcmp(tag.getName(), "sense")) {
+			SWBuf n = tag.getAttribute("n");
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+			        if (n != "") {
+                                	buf += "{\\par\\b1 ";
+					buf += n;
+					buf += ". }";
+				}
+			}
+			else if (tag.isEndTag()) {
+			                buf += "\\par ";
+			}
+		}
+
+		// <div>
+		else if (!strcmp(tag.getName(), "div")) {
+
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+				buf.append("\\par\\par\\pard ");
+			}
+			else if (tag.isEndTag()) {
+			}
+		}
+
+		// <pos>
+		else if (!strcmp(tag.getName(), "pos")) {
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+				buf += "{\\i1 ";
+			}
+			else if (tag.isEndTag()) {
+			        buf += "}";
+			}
+		}
+
+		// <gen>
+		else if (!strcmp(tag.getName(), "gen")) {
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+				buf += "{\\i1 ";
+			}
+			else if (tag.isEndTag()) {
+			        buf += "}";
+			}
+		}
+
+		// <case>
+		else if (!strcmp(tag.getName(), "case")) {
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+				buf += "{\\i1 ";
+			}
+			else if (tag.isEndTag()) {
+			        buf += "}";
+			}
+		}
+
+		// <tr>
+		else if (!strcmp(tag.getName(), "tr")) {
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+				buf += "{\\i1 ";
+			}
+			else if (tag.isEndTag()) {
+			        buf += "}";
+			}
+		}
+
+		// <number>
+		else if (!strcmp(tag.getName(), "number")) {
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+				buf += "{\\i1 ";
+			}
+			else if (tag.isEndTag()) {
+			        buf += "}";
+			}
+		}
+
+		// <mood>
+		else if (!strcmp(tag.getName(), "mood")) {
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+				buf += "{\\i1 ";
+			}
+			else if (tag.isEndTag()) {
+			        buf += "}";
+			}
+		}
+
+		// <etym>
+		else if (!strcmp(tag.getName(), "etym")) {
+			if ((!tag.isEndTag()) && (!tag.isEmpty())) {
+				buf += "[";
+			}
+			else if (tag.isEndTag()) {
+			        buf += "]";
+			}
+		}
+
+		else {
+			return false;  // we still didn't handle token
+		}
+	}
+	return true;
+}
+
+
+SWORD_NAMESPACE_END
+



More information about the sword-cvs mailing list