[sword-svn] r563 - trunk/migratetags/matchers
scribe at crosswire.org
scribe at crosswire.org
Thu May 25 04:17:14 EDT 2023
Author: scribe
Date: 2023-05-25 04:17:14 -0400 (Thu, 25 May 2023)
New Revision: 563
Modified:
trunk/migratetags/matchers/gntmatcher.h
Log:
added regularization and sigma normalization
Modified: trunk/migratetags/matchers/gntmatcher.h
===================================================================
--- trunk/migratetags/matchers/gntmatcher.h 2023-04-27 17:45:35 UTC (rev 562)
+++ trunk/migratetags/matchers/gntmatcher.h 2023-05-25 08:17:14 UTC (rev 563)
@@ -1,15 +1,21 @@
#include "matcher.h"
#include <utf8greekaccents.h>
+#include <map>
#ifndef gntmatcher_h
#define gntmatcher_h
+using std::map;
+
class GNTMatcher : public Matcher {
UTF8GreekAccents sanitizeGreekAccentFilter;
+ map<SWBuf, SWBuf> globalRegs;
public:
GNTMatcher() : sanitizeGreekAccentFilter() {
sanitizeGreekAccentFilter.setOptionValue("off");
+ globalRegs["ΘΣ"] = "ΘΕΟΣ";
+ globalRegs["ΚΥ"] = "ΚΥΡΙΟΥ";
}
// Compares 2 words and tries to give a percentage assurance of a match
@@ -109,7 +115,6 @@
SWBuf t1 = word;
// remove greek accents
sanitizeGreekAccentFilter.processText(t1);
- t1.toUpper();
// remove ignoreSeries characters
SWBuf o = t1;
@@ -122,8 +127,14 @@
SWBuf checkChar;
getUTF8FromUniChar(ch, &checkChar);
if (checkChar != " " && strstr(ignoreSeries, checkChar.c_str())) continue;
+ if (checkChar == "ϲ") checkChar = "σ";
+ if (checkChar == "ς") checkChar = "σ";
t1.append(checkChar);
}
+ t1.toUpper();
+ if (globalRegs.find(t1) != globalRegs.end()) {
+ t1 = globalRegs[t1];
+ }
return t1;
}
More information about the sword-cvs
mailing list