#-*- coding: utf-8 -*- import re import sys #from collatex import * #python collatex def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) SUPPLIED_START = re.compile('\[(?!\d)') SUPPLIED_END = re.compile('(? 0: token['interface'] = ENDING_NOT_FINAL_SIGMA.sub('ς', token['interface'].lower()) return token else: return token def hide_supplied_text(self, token): token['interface'] = SUPPLIED_START.sub('', SUPPLIED_END.sub('', token['interface'])) return token def hide_unclear_text_legacy(self, token): token['interface'] = token['interface'].replace('_', '') return token def hide_unclear_text(self, token): token['interface'] = UNCLEARS.sub('', token['interface']) return token def hide_apostrophes(self, token): token['interface'] = APOSTROPHES.sub('', token['interface']) return token def hide_diaeresis(self, token): undiareses_map = { 'ϊ': 'ι', 'ϊ': 'ι', 'ϋ': 'υ', 'ϋ': 'υ', } token['interface'] = DIARESES.sub(lambda x: undiareses_map[x.group()], token['interface']) return token def show_punctuation(self, token): if 'pc_before' in token: token['interface'] = '%s%s' % (token['pc_before'],token['interface']) if 'pc_after' in token: token['interface'] = '%s%s' % (token['interface'], token['pc_after']) return token class PrepareData(object): def __init__(self): self.settingsApplier = ApplySettings() def prepare_t(self, string, display_settings={}, display_settings_config=[]): #turn it into a dictionary so we can use other functions token = {'interface': string} token = self.settingsApplier.lower_case_greek(token) token = self.settingsApplier.hide_supplied_text(token) token = self.settingsApplier.hide_unclear_text(token) token = self.settingsApplier.hide_apostrophes(token) token = self.settingsApplier.hide_diaeresis(token) return token['interface'] def set_rule_string(self, token, display_settings={}, display_settings_config=[]): if 'n' in token: word = token['n'] elif 'expand_abbreviations' in display_settings and 'expanded' in token.keys(): word = token['expanded'] else: word = token['original'] temp_token = {'interface': word} # self.settingsApplier = ApplySettings() temp_token = self.settingsApplier.lower_case_greek(temp_token) temp_token = self.settingsApplier.hide_apostrophes(temp_token) temp_token = self.settingsApplier.hide_diaeresis(temp_token) token['rule_string'] = temp_token['interface'] return token class Collation(object): def call_collatex(self, data, options): #this is the collateX python code collation = Collation() collation_output = collate(data, output='json', layout="horizontal", segmentation=False) eprint(collation_output) return collation_output