<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ page trimDirectiveWhitespaces="true" %> <%@ page import="org.crosswire.utils.Sessions" %> <%@ page import="java.util.Vector" %> <%@ page import="java.util.HashSet" %> <%@ page import="eu.interedition.collatex.simple.SimpleWitness" %> <%@ page import="eu.interedition.collatex.Witness" %> <%@ page import="eu.interedition.collatex.Token" %> <%@ page import="eu.interedition.collatex.io.*" %> <%@ page import="eu.interedition.collatex.jung.JungVariantGraph" %> <%@ page import="eu.interedition.collatex.simple.SimpleToken" %> <%@ page import="eu.interedition.collatex.simple.SimpleTokenNormalizers" %> <%@ page import="eu.interedition.collatex.simple.SimpleVariantGraphSerializer" %> <%@ page import="eu.interedition.collatex.simple.SimpleCollation" %> <%@ page import="eu.interedition.collatex.CollationAlgorithm" %> <%@ page import="eu.interedition.collatex.CollationAlgorithmFactory" %> <%@ page import="eu.interedition.collatex.matching.EqualityTokenComparator" %> <%@ page import="eu.interedition.collatex.VariantGraph" %> <%@ page import="eu.interedition.collatex.util.VariantGraphRanking" %> <%@ page import="com.google.common.collect.RowSortedTable" %> <%@ page import="java.io.BufferedReader" %> <%@ page import="java.io.InputStreamReader" %> <%@ page import="java.lang.StringBuilder" %> <%@ page import="java.util.Set" %> <%@ page import="java.util.List" %> <%@ page import="java.util.Iterator" %> <%@ page import="java.util.ArrayList" %> <%@ page import="java.util.Collections" %> <%@ page import="java.util.Arrays" %> <%@ page import="java.util.Comparator" %> <%@ page import="java.util.StringTokenizer" %> <%@ page import="com.google.common.base.Function" %> <%@ page import="javax.xml.stream.XMLOutputFactory" %> <%@ page import="javax.xml.stream.XMLStreamWriter" %> <%@ page import="java.io.StringWriter" %> <%@ page import="java.util.HashMap" %> <%@ page import="java.util.Map" %> <%@ page import="org.crosswire.utils.HTTPUtils" %> <%@ page import="org.crosswire.utils.Utils" %> <%@ page import="org.crosswire.sword.keys.VerseKey" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.WitnessReading" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.Convert" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Document" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Regularization" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Regularization.RegularizationRule" %> <%@ page import="org.apache.log4j.Logger" %> <%@ page import="org.json.JSONObject" %> <%@ page import="org.json.JSONArray" %> <%@ page import="org.crosswire.webtools.annotation.*" %> <%@ page import="org.crosswire.webtools.*" %> <%@ page import="javax.validation.constraints.Pattern" %> <%! @Description(value = "Collate text segments", name = "collate") public static class MyParameters extends Parameters<MyParameters> { @Description(value = "witness text to include in collation. can repeat or can be numbered", example = "w=text or w1=text") public String w = null; @Description(value = "custom witness label to show for a witness, can repeat corresponding to {w} parameters or can be numbered.", example = "l=P52 or l1=P52") public String l = null; @Pattern(regexp = "^(atable|csv|graphml|dot|graph|tei|apptext|apphtml)?$", message = "Valid response formats: \"atable\", \"graph\", \"csv\", \"graphml\", \"dot\", \"tei\", \"apptext\", or \"apphtml\"") @Description(value = "specify the result format: atable, graph, csv, graphml, dot, tei, apptext, apphtml", defaultValue = "atable", example = "graph") public String format = null; @Pattern(regexp = "^(dekker|medite|needleman-wunsch)?$", message = "Valid response formats: \"dekker\", \"medite\", or \"needleman-wunsch\"") @Description(value = "one of: dekker, medite, needleman-wunsch", defaultValue = "dekker", example = "medite") public String algorithm = null; @Description(value = "collate witnesses which are members of a document group (-1 : all extant)", example = "123") public Integer documentGroupID = null; @Description(value = "which verse to collate (only applies to witnesses which are looked up (e.g., from a documentGroupID)", example = "Jn.2.2") public String indexContent = null; @Description(value = "include additional verses after indexContent parameter", defaultValue = "0", example = "1") public Integer extraVerses = 0; @Description(value = "Base text module name to include in the collation", example = "NA28") public String baseText = null; @Description(value = "Additional module name to include in the collation. Can repeat", example = "TR") public String loadModule = null; @Description(value = "Witness to include in the collation. Can repeat.", example = "10075") public Integer loadDocID = null; @Description(value = "Apply a general regularization to witnesses, ignoring supplied marks", defaultValue = "false", example = "true") public Boolean ignoreSupplied = false; @Description(value = "Apply a general regularization to witnesses, ignoring unclear marks", defaultValue = "false", example = "true") public Boolean ignoreUnclear = false; @Description(value = "Apply a general regularization to witnesses, ignoring punctuation", defaultValue = "false", example = "true") public Boolean ignorePunctuation = false; @Description(value = "Apply a general regularization to witnesses, ignoring Greek accent marks", defaultValue = "false", example = "true") public Boolean ignoreAccents = false; @Description(value = "Restrict witnesses to a given language", example = "grc") public String lang = null; @Description(value = "Apply regularization rules from this user, can be repeated, appended with '+' only localRules, appended with '*' only globalRules", example = "jsmith+") public String regUserID = null; @Description(value = "If set to a username and this username has a transcription for this verse, use the user's transcription over the global transcription", example = "jsmith") public String preferUser = null; // deprecated parameters @Description(value = "internal") public String biblicalContent = null; @Description(value = "internal") public String verse = null; @Override protected Map<String, String> getRequestParameters() { Map<String, String> requestParameterMap = super.getRequestParameters(); // clear out our w1..n and l1..n parameters before validation final java.util.regex.Pattern wSeries = java.util.regex.Pattern.compile("^[wl]\\d+$"); for (Iterator<Map.Entry<String, String>> it = requestParameterMap.entrySet().iterator(); it.hasNext(); ) { Map.Entry<String, String> p = it.next(); if (wSeries.matcher(p.getKey()).matches()) { // getLogger().info("REMOVING PARAMETER: " + p.getKey()); it.remove(); } // else getLogger().info("NOT REMOVING PARAMETER: " + p.getKey()); } return requestParameterMap; } @Override protected void afterLoad() { if (indexContent == null) indexContent = biblicalContent; if (indexContent == null) indexContent = verse; } @Override protected void customValidation() { } } %> <% // check for json and read body must be first thing or our body is read by getParameter calls //dumpCallInfo(request, params.getLogger()); List<SimpleWitness> witnesses = new ArrayList<SimpleWitness>(); JSONObject requestJSON = null; if ("application/json".equals(request.getHeader("accept"))) { StringBuilder buffer = new StringBuilder(); BufferedReader reader = request.getReader(); String line; while ((line = reader.readLine()) != null) { buffer.append(line); } String data = buffer.toString(); try { WordWithParallelCombiningHintsTokenizer tokenizer = new WordWithParallelCombiningHintsTokenizer(); requestJSON = new JSONObject(data); JSONArray witnessesNode = requestJSON.getJSONArray("witnesses"); //params.getLogger().info("witnessesNode="+witnessesNode); witnesses = new ArrayList<SimpleWitness>(witnessesNode.length()); for (int i = 0; i < witnessesNode.length(); ++i) { JSONObject witnessObject = (JSONObject)witnessesNode.get(i); final SimpleWitness witness = new SimpleWitness(witnessObject.getString("id").trim()); if (witnessObject.has("tokens")) { final JSONArray tokensArray = witnessObject.getJSONArray("tokens"); final List<eu.interedition.collatex.Token> tokens = new ArrayList<eu.interedition.collatex.Token>(tokensArray.length()); for (int j = 0; j < tokensArray.length(); ++j) { final JSONObject tokenObject = (JSONObject)tokensArray.get(j); String tokenContent = tokenObject.has("t")?tokenObject.getString("t"):null; String normalizedTokenContent = tokenObject.has("n")?tokenObject.getString("n"):null; if (normalizedTokenContent == null) normalizedTokenContent = SimpleWitness.TOKEN_NORMALIZER.apply(tokenContent); tokens.add(new SimpleToken(witness, tokenContent, normalizedTokenContent) { JSONObject jobj = tokenObject; public String toString() { return jobj.toString(); } }); } witness.setTokens(tokens); } else { final String contentNode = witnessObject.getString("content"); // witness = new SimpleWitness(witnessObject.getString("id").trim(), contentNode, tokenizer, SimpleTokenNormalizers.LC_TRIM_WS)); witness.setTokenContents(tokenizer.apply(contentNode), SimpleTokenNormalizers.LC_TRIM_WS); } witnesses.add(witness); } /* if (witnesses.isEmpty()) { throw new IOException("No witnesses in collation"); } Comparator<Token> tokenComparator = null; final JsonValue tokenComparatorNode = collationObject.get("tokenComparator"); if (tokenComparatorNode != null && tokenComparatorNode.getValueType() == JsonValue.ValueType.OBJECT) { final JsonObject tokenComparatorObject = (JsonObject) tokenComparatorNode; try { if ("levenshtein".equals(tokenComparatorObject.getString("type"))) { final int configuredDistance = tokenComparatorObject.getInt("distance", 0); tokenComparator = new EditDistanceTokenComparator(configuredDistance == 0 ? 1 : configuredDistance); } } catch (ClassCastException e) { // ignored } } if (tokenComparator == null) { tokenComparator = new EqualityTokenComparator(); } CollationAlgorithm collationAlgorithm = null; final JsonValue collationAlgorithmNode = collationObject.get("algorithm"); if (collationAlgorithmNode != null && collationAlgorithmNode.getValueType() == JsonValue.ValueType.STRING) { final String collationAlgorithmValue = ((JsonString) collationAlgorithmNode).getString(); if ("needleman-wunsch".equalsIgnoreCase(collationAlgorithmValue)) { collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(tokenComparator); } else if ("gst".equalsIgnoreCase(collationAlgorithmValue)) { collationAlgorithm = CollationAlgorithmFactory.greedyStringTiling(tokenComparator, 2); } else if ("medite".equalsIgnoreCase(collationAlgorithmValue)) { collationAlgorithm = CollationAlgorithmFactory.medite(tokenComparator, SimpleToken.TOKEN_MATCH_EVALUATOR); } } if (collationAlgorithm == null) { collationAlgorithm = CollationAlgorithmFactory.dekker(tokenComparator); } boolean joined = true; try { joined = collationObject.getBoolean("joined", true); } catch (ClassCastException e) { // ignored } if (collationAlgorithm instanceof InspectableCollationAlgorithm) { boolean mergeTranspositions = true; try { mergeTranspositions = collationObject.getBoolean("transpositions", true); } catch (ClassCastException e) { // ignored } ((InspectableCollationAlgorithm) collationAlgorithm).setMergeTranspositions(mergeTranspositions); } return new SimpleCollation(witnesses, collationAlgorithm, joined); */ } catch (Exception e) { e.printStackTrace(); } } %> <%! public static Vector<WitnessReading> getWitnessReadingsFromParams(HttpServletRequest request) { Vector<WitnessReading> retVal = new Vector<WitnessReading>(); String witnessSegment = null; int i = 1; for (i = 1; ((witnessSegment = request.getParameter("w"+i)) != null); ++i) { try { witnessSegment = Transcription.assureUnicode(witnessSegment); String label = request.getParameter("l"+i); if (label == null) label = "w"+i; label = Transcription.assureUnicode(label); WitnessReading wr = new WitnessReading(label, witnessSegment.trim()); retVal.add(wr); } catch (Exception e) { e.printStackTrace(); } } String w[] = request.getParameterValues("w"); String l[] = request.getParameterValues("l"); if (w != null) { for (int j = 0; j < w.length; ++j) { try { witnessSegment = Transcription.assureUnicode(w[j]); String label = l != null && l.length > j ? l[j] : null; if (label == null) label = "w"+i; label = Transcription.assureUnicode(label); WitnessReading wr = new WitnessReading(label, witnessSegment.trim()); retVal.add(wr); ++i; } catch (Exception e) { e.printStackTrace(); } } } return retVal; } public static void dumpCallInfo(HttpServletRequest request, Logger logger) { logger.info("collate called..."); logger.info("request.getContentType: " + request.getContentType()); logger.info("Headers: "); for (Object o: java.util.Collections.list(request.getHeaderNames())) { logger.info(o + "=" + request.getHeader(o.toString())); } logger.info("Attributes: "); for (Object o: java.util.Collections.list(request.getAttributeNames())) { logger.info(o + "=" + request.getAttribute(o.toString())); } logger.info("Parameters: "); for (Object o: java.util.Collections.list(request.getParameterNames())) { for (String v: request.getParameterValues(o.toString())) { logger.info(o + "=" + v); } } } %> <% MyParameters params = new MyParameters().loadFromRequest(request, response, false); if (params.getErrors().size() == 0) { String format = (requestJSON != null) ? "json" : params.format; String loadModule[] = request.getParameterValues("loadModule"); String loadDocID[] = request.getParameterValues("loadDocID"); String dotPath = Utils.getSysConfig(session).getProperty("GraphvizCmd"); String defaultFontSpec = Utils.getSysConfig(session).getProperty("DefaultFontSpec", "GentiumPlus"); if (dotPath != null) SimpleVariantGraphSerializer.setDotPath(dotPath); boolean usage = false; boolean givenBaseText = false; if (format == null) format = "atable"; RowSortedTable<Integer, Witness, Set<Token>> table = null; VariantGraph graph = null; String primaryLang = params.lang; Vector<WitnessReading> witnessesFromRequest = getWitnessReadingsFromParams(request); if (requestJSON != null || !witnessesFromRequest.isEmpty() || params.documentGroupID != null || loadDocID != null || loadModule != null) { VerseKey verseKey = null; if (params.indexContent != null) { verseKey = new VerseKey(); verseKey.setIntros(true); verseKey.setText(params.indexContent); if (params.extraVerses > 0) { VerseKey endKey = new VerseKey(); endKey.setIntros(true); endKey.setText(params.indexContent); endKey.increment(params.extraVerses); params.indexContent = verseKey.getShortText() + "-" + endKey.getShortText(); } } VerseKey vk = new VerseKey(); vk.setIntros(true); Vector<WitnessReading> witnessReadings = new Vector<WitnessReading>(); // First see if we've been passed any verbatim witness text in w1, w2, w3... params and include those witnessReadings.addAll(witnessesFromRequest); // Next see if we've been passed a Document Group of manuscript IDs to collate and include those if (params.documentGroupID != null && params.indexContent != null) { witnessReadings.addAll(Transcription.getWitnessReadings(params.documentGroupID, params.indexContent, !params.ignoreSupplied, !params.ignoreUnclear, !params.ignorePunctuation, !params.ignoreAccents, params.preferUser, params.lang)); } /* for (WitnessReading wr: witnessReadings) { params.getLogger().info(wr.getLabel() + ": " + wr.getText()); } */ // if we've been given a baseText, then retrieve the reading. If not, then don't retrieve, but set to NA28 as default, in case it is given in our witness list params.getLogger().debug("baseText: " + params.baseText); if (params.baseText != null) { // see if our baseText has already been passed to us for (WitnessReading wr : witnessReadings) { params.getLogger().debug("checking: " + params.baseText + " == " + wr.getDocumentName() + " || " + wr.getLabel()); if (params.baseText.equals(wr.getDocumentName()) || params.baseText.equals(wr.getLabel())) { givenBaseText = true; if (!wr.getDocumentName().equals(wr.getLabel())) { // if we've been provided just a document name which has multiple hands, then set to first hand params.baseText = wr.getLabel(); } break; } } // if we haven't been passed our baseText but we have a verse, let's try to retrieve the baseText reading if (!givenBaseText && params.indexContent != null) { witnessReadings.add(loadModuleReading(params.baseText, params.indexContent, !params.ignoreSupplied, !params.ignoreUnclear, !params.ignorePunctuation, !params.ignoreAccents, params.getLogger())); } } else params.baseText = "NA28"; if (loadModule != null && params.indexContent != null) { for (String m : loadModule) { witnessReadings.add(loadModuleReading(m, params.indexContent, !params.ignoreSupplied, !params.ignoreUnclear, !params.ignorePunctuation, !params.ignoreAccents, params.getLogger())); } } if (loadDocID != null && params.indexContent != null) { for (String m : loadDocID) { try { Document d = Document.getDocument(Integer.parseInt(m)); //params.getLogger().info("loading document: " + d.getGANumber()); witnessReadings.addAll(Transcription.getWitnessReadings(new Document[] { d }, params.indexContent, !params.ignoreSupplied, !params.ignoreUnclear, !params.ignorePunctuation, !params.ignoreAccents, params.preferUser)); } catch (Exception e) { e.printStackTrace(); } } //params.getLogger().info("witnessReadings: " + witnessReadings); } String regUserIDs[] = request.getParameterValues("regUserID"); if (regUserIDs != null && regUserIDs.length > 0) { Vector<RegularizationRule> regRules = new Vector<RegularizationRule>(); for (String regUser : regUserIDs) { boolean global = true; boolean local = true; if (regUser.endsWith("+")) { global = false; regUser = regUser.substring(0, regUser.length()-1); } else if (regUser.endsWith("*")) { local = false; regUser = regUser.substring(0, regUser.length()-1); } params.getLogger().debug("Reg Rules: " + regUser + "; global: " + global + "; local: " + local); if (global) regRules.addAll(Arrays.asList(Regularization.getRulesGlobal(regUser, false))); if (local) regRules.addAll(Arrays.asList(Regularization.getRulesByVerse(regUser, false, params.indexContent))); } params.getLogger().debug("Rules count: " + regRules.size()); for (WitnessReading wr : witnessReadings) { wr.setText(Regularization.regularize(verseKey.getHashNumber(), regRules, wr.getText())); } } Vector<String> lacunaWitnesses = new Vector<String>(); Vector<String> omittedWitnesses = new Vector<String>(); Vector<String> presentWitnesses = new Vector<String>(); WordWithParallelCombiningHintsTokenizer tokenizer = new WordWithParallelCombiningHintsTokenizer(); boolean stripOmitted = false; boolean stripLacuna = false; // "text" is for legacy purposes; remove when sure it is not used if ("apphtml".equals(format) || "text".equals(format) || "apptext".equals(format) || "atable".equals(format) || "csv".equals(format)) { stripOmitted = true; stripLacuna = true; } if ("graph".equals(format)) { /* stripOmitted = true; stripLacuna = true; */ } for (WitnessReading wr : witnessReadings) { if (primaryLang == null && wr.getDocumentID() > 0) { Document d = Document.getDocument(wr.getDocumentID()); primaryLang = d.getLanguage(); } if ("(([ Omitted ]))".equals(wr.getText())) { omittedWitnesses.add(wr.getLabel()); if (stripOmitted) continue; } else if ("[lac]".equals(wr.getText())) { lacunaWitnesses.add(wr.getLabel()); if (stripLacuna) continue; } else if (givenBaseText || !wr.getLabel().equals(params.baseText)) presentWitnesses.add(wr.getLabel()); witnesses.add(new SimpleWitness(wr.getLabel(), wr.getText(), tokenizer, SimpleTokenNormalizers.LC_TRIM_WS)); } String algorithm = (requestJSON != null) ? requestJSON.getString("algorithm") : params.algorithm; CollationAlgorithm collationAlgorithm = null; if ("medite".equals(algorithm)) { collationAlgorithm = CollationAlgorithmFactory.medite(new EqualityTokenComparator(), SimpleToken.TOKEN_MATCH_EVALUATOR); } else if ("needleman-wunsch".equals(algorithm)) { collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(new EqualityTokenComparator()); } else { collationAlgorithm = CollationAlgorithmFactory.dekker(new EqualityTokenComparator()); } boolean joined = false; graph = new SimpleCollation(witnesses, collationAlgorithm, joined).collate(new JungVariantGraph()); /* // from old method, already done in the SimpleCollation class above //collationAlgorithm.collate(graph, witnesses.toArray(new SimpleWitness[0])); for (String wl : omittedWitnesses) { graph.register(new SimpleWitness(wl, "")); } */ if ("graphml".equals(format)) { response.setContentType("text/xml"); StringWriter writer = new StringWriter(); XMLStreamWriter swriter = javax.xml.stream.XMLOutputFactory.newFactory().createXMLStreamWriter(writer); SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(graph); serializer.toGraphML(swriter); %><%=writer.toString()%><% } else if ("graph".equals(format)) { response.setContentType("text/xml"); SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(graph); serializer.toSVG(out); } else if ("dot".equals(format)) { response.setContentType("text/plain"); SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(graph); serializer.toDot(out); } // "text" is for legacy purposes; remove when sure it is not used anymore else if ("apptext".equals(format) || "text".equals(format)) { response.setContentType("text/plain"); SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(graph); out.write("Not Present in " + SimpleVariantGraphSerializer.toWitnessesLabel(lacunaWitnesses, true)+"\n\n"); out.write("Present in " + SimpleVariantGraphSerializer.toWitnessesLabel(presentWitnesses, true)+"\n\n"); try { serializer.toPlainTextApparatus(out, params.baseText); } catch (Exception e) { params.getLogger().error("ERROR in apptext display: "); e.printStackTrace(); } } else if ("apphtml".equals(format)) { response.setContentType("text/html"); SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(graph); out.write("<p class=\"presenceList\"><span class=\"label\">Not Present in</span> " + SimpleVariantGraphSerializer.toWitnessesLabelHTML(lacunaWitnesses, true)+"</p>"); out.write("<p class=\"presenceList\"><span class=\"label\">Present in</span> " + SimpleVariantGraphSerializer.toWitnessesLabelHTML(presentWitnesses, true)+"</p>"); out.write("<div class=\"apparatus\">"); try { serializer.toHTMLApparatus(out, params.baseText); } catch (Exception e) { params.getLogger().error("ERROR in apphtml display: "); e.printStackTrace(); } out.write("</div>"); } else if ("atable".equals(format) || "csv".equals(format)) { boolean csv = "csv".equals(format); table = VariantGraphRanking.of(graph).asTable(); if (!csv) { %> <script type="text/javascript" src="/community/js/jquery/jquery.min.js"></script> <style type="text/css"> @font-face { font-family: 'AntinoouWeb'; src: url('/community/fonts/antinoou-webfont.eot'); src: url('/community/fonts/antinoou-webfont.eot?#iefix') format('embedded-opentype'), url('/community/fonts/antinoou-webfont.woff') format('woff'), url('/community/fonts/antinoou-webfont.ttf') format('truetype'); font-weight: normal; font-style: normal; } @font-face { font-family: 'EstreWeb'; src: url('/community/fonts/estre.eot'); src: url('/community/fonts/estre.eot?#iefix') format('embedded-opentype'), url('/community/fonts/estre.woff') format('woff'), url('/community/fonts/estre.ttf') format('truetype'); font-weight: normal; font-style: normal; } @font-face { font-family: 'BukyvedeWeb'; src: url('/community/fonts/Bukyvede.eot?#iefix') format('embedded-opentype'), url('/community/fonts/Bukyvede.woff') format('woff'), url('/community/fonts/Bukyvede.ttf') format('truetype'), url('/community/fonts/Bukyvede.svg#Bukyvede') format('svg'); font-weight: normal; font-style: normal; } @font-face { font-family: 'SBL_HebrewWeb'; src: url('/community/fonts/sbl_hbrw-webfont.woff2') format('woff2'), url('/community/fonts/sbl_hbrw-webfont.woff') format('woff'); font-weight: normal; font-style: normal; } @font-face { font-family: GentiumPlus; src: url(/community/fonts/GentiumPlus-I.woff) format('woff'); font-style: italic; } @font-face { font-family: GentiumPlus; src: url(/community/fonts/GentiumPlus-R.woff) format('woff'); /* should work for all recent browsers */ font-style: normal; } .witnessWord, body, td, div, span, p { font-family: <%=specialFonts.containsKey(primaryLang) ? (specialFonts.get(primaryLang) + ","):"" %><%=defaultFontSpec%>, Gentium, Times, GentiumPlus, Arial Unicode MS; } table { border-collapse: collapse; } td, th { padding: 5px 4px; border: 1px solid #CCC; /*white-space: nowrap; overflow: hidden;*/ } </style> <table class="results"> <tr id="beginSeg"></tr> <tr id="endSeg"></tr> <% } else { response.setContentType("text/csv"); response.setHeader("Content-Disposition", "attachment; filename=collation"+(params.indexContent != null ? ("-"+params.indexContent) : "")+".csv"); out.print("\uFEFF"); } int row = 0; for (Witness witness : table.columnKeySet()) { ++row; String sigil = witness.getSigil().trim(); sigil.replace("\n", ""); if (!csv) { %> <tr class="witnessRow" id="row<%=row%>"><th><%= sigil %></th> <% } else { out.print("\""+sigil+"\""); } int col = 0; for (Integer wordNum : table.rowKeySet()) { ++col; if (!csv) { %> <td class="witnessWord col<%=col%>"> <% } else { out.print(",\""); } Set<Token> tokens = table.get(wordNum, witness); if (tokens != null) { for (Token token : tokens) { out.print(((SimpleToken)token).getContent().trim()); } } if (!csv) { %> </td> <% } else { out.print("\""); } } if (!csv) out.print("</tr>"); else out.print("\n"); } if (!csv) { %> </table> <script> var servicesURL = '/community/vmr/api'; function createRules(rules, i) { if (!i) i = 0; if (i >= rules.length) { alert('Successfully added ' + i + ' rule(s).'); return; } // don't recreate a rule if only one word if (rules[i].indexOf(' ') < 0) return createRules(rules, ++i); var postData = { groupName : 'Unassigned', verse : '<%=params.indexContent%>', scope : 'Verse', visibility : 'Public', sourceWord : rules[i], targetWord : '(('+rules[i]+'))', contextPre : '', contextPost : '', type : 'Parallel Segmentation', comment : '' }; var url = servicesURL + "/regularization/put/"; $.post(url, $.param(postData), function(o) { var xml = $.parseXML(o); var error = $(xml).find('error'); if (error && error.length) { alert('error: '+$(error).attr('message')); } else { // it worked! Do the next one :) createRules(rules, ++i); } }); } function setSegmentation() { var rulesSet = {}; var b = parseInt($('input[name="pbegin"]:checked').val()); var e = parseInt($('input[name="pend"]:checked').val()); $('.witnessRow').each(function() { var rule = ''; for (var i = b; i <= e; ++i) { var word = $(this).children('.col'+i).text(); word = $.trim(word); if (word.length > 0) { rule += ' ' + word; } } rule = $.trim(rule); if (rule.length > 0) { rulesSet[rule] = true; } }); var t = ''; var rules = []; for (var r in rulesSet) { rules.push(r); t += (r + '\n'); } if (confirm('Readings:\n\n' + t + '\n\nWould you like to make a parallel segmentation rule?')) { createRules(rules); } } $(document).ready(function () { var t1 = '<th style="text-align:right;">Variant Range: Begin</th>'; var t2 = '<th style="text-align:right;">End</th>'; var c = 0; $('#row1').children('td').each(function() { ++c; t1 += '<td col="'+c+'"><input name="pbegin" type="radio" value="'+c+'"/></td>'; t2 += '<td col="'+c+'"><input name="pend" type="radio" value="'+c+'"/></td>'; }); t1 += '<td rowspan="2"><button onclick="setSegmentation(); return false;">Show Readings</button></td>'; $('#beginSeg').html(t1); $('#endSeg').html(t2); }); </script> <% } } else if ("tei".equals(format)) { response.setContentType("text/xml"); StringWriter writer = new StringWriter(); XMLStreamWriter swriter = XMLOutputFactory.newFactory().createXMLStreamWriter(writer); SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(graph); serializer.toTEI(swriter); %> <%=writer.toString()%> <% } // This is stuff Bham wants as input to their Apparatus editing tool else if ("json".equals(format)) { params.getLogger().debug("*** Collate returning JSON ***"); response.setContentType("application/json"); table = VariantGraphRanking.of(graph).asTable(); StringBuffer apparatus = new StringBuffer(); // "[ // { // start:0, // end:0, // readings:[ // { // witnesses:['w1', 'w2'], // text:[ // { // index:0, // t:agaph, // reading:['w1', 'w2'], // manuscipts:[20001, 20001] // },{...word2...} // ] // }, // {... reading 2 ... } // ] // } // ]" // apparatus.append("["); HashMap<Integer, HashMap<String, Vector<String>>> app = new HashMap<Integer, HashMap<String, Vector<String>>>(); int segment = 2; Vector<String> aTextReadings = new Vector<String>(); for (Integer wordNum : table.rowKeySet()) { apparatus.append("{"); apparatus.append("\"start\":"+segment+","); apparatus.append("\"end\":"+segment+","); final HashMap<String, Vector<String>> readings = new HashMap<String, Vector<String>>(); for (Witness witness : table.columnKeySet()) { Set<Token> tokens = table.get(wordNum, witness); String reading = "|lac|"; if (tokens != null) { reading = ""; for (Token token : tokens) { reading += ((SimpleToken)token).getContent(); } } Vector<String> wits = readings.get(reading); if (wits == null) { readings.put(reading, new Vector<String>()); wits = readings.get(reading); } wits.add(witness.getSigil()); } final Vector<String> rdngs = new Vector<String>(); rdngs.addAll(readings.keySet()); Collections.sort(rdngs, new Comparator<String>() { public int compare(String o1, String o2) { return readings.get(o2).size() - readings.get(o1).size(); } }); apparatus.append("\"readings\":["); for (String read : rdngs) { Vector<String> wits = readings.get(read); apparatus.append("{ \"witnesses\":"); String ws = "["; boolean first = true; for (String w: wits) { if (!first) ws +=", "; first = false; ws+= ("\""+w+"\""); } ws += "]"; apparatus.append(ws+",\"text\":["+(!"|lac|".equals(read)?("{\"index\":"+segment+", \"t\":\""+read+"\",\"reading\":"+ws+", \"manuscript\":"+ws+"}"):"")+"]"); apparatus.append("},"); } apparatus.append("]"); apparatus.append("},"); String r = rdngs.get(0); if ("|lac|".equals(r)) r = ""; aTextReadings.add(r); app.put(segment, readings); segment+=2; } apparatus.append("]"); String appts = apparatus.toString().replaceAll(",]","]"); appts = appts.replaceAll(",}","}"); params.getLogger().debug("*** Collate returning: " + appts); out.print("{"); out.print("\"apparatus\":"+appts+","); out.print("\"overtext\":[{\"tokens\":["); int seg = 2; String overtext = ""; for (String aText : aTextReadings) { overtext += "{\"index\":"+seg+", \"t\":\""+aText+"\", \"reading\":\"aText\",\"manuscript\":\"aText\"},"; seg+=2; } overtext += "]}]}"; overtext = overtext.replaceAll(",]","]"); overtext = overtext.replaceAll(",}","}"); out.print(overtext); } else { usage = true; } } else { if (params.format == null) usage = true; } if (!usage) return; else ((Parameters)params).format = "html"; } else ((Parameters)params).format = "html"; Serializer.reportErrors(request, response, out, params, true); %> <%! static HashMap<String, String> specialFonts = new HashMap<String, String>(); static HashSet<String> rightJustify = new HashSet<String>(); static { specialFonts.put("bo", "Antinoou, AntinoouWeb"); specialFonts.put("sa", "Antinoou, AntinoouWeb"); specialFonts.put("fa", "Antinoou, AntinoouWeb"); specialFonts.put("mae", "Antinoou, AntinoouWeb"); specialFonts.put("ly", "Antinoou, AntinoouWeb"); specialFonts.put("cw", "Antinoou, AntinoouWeb"); specialFonts.put("syc", "Estrangelo Edessa, EstreWeb"); specialFonts.put("chu", "BukyvedeWeb"); rightJustify.add("syc"); rightJustify.add("he"); rightJustify.add("arb"); } public static WitnessReading loadModuleReading(String moduleName, String key, boolean supplied, boolean unclear, boolean punct, boolean accents, Logger logger) { String moduleServiceURL = "http://crosswire.org/study/fetchdata.jsp"; String params = "mod="+java.net.URLEncoder.encode(moduleName) + "&key="+java.net.URLEncoder.encode(key) + "&format=strip"; StringBuffer result = HTTPUtils.postURL(moduleServiceURL, params); String t = result.toString().toLowerCase(); WitnessReading retVal = null; try { Vector<WitnessReading> wits = Convert.getTEITranscriptionText(t, supplied, unclear, punct, accents); retVal = (wits.size() > 0) ? wits.get(0) : new WitnessReading("", t); } catch (Exception e) { logger.error("Error looking up module: " + moduleName, e); retVal = new WitnessReading("", t); } retVal.setDocumentName(moduleName); return retVal; } public static class WordWithParallelCombiningHintsTokenizer implements Function<String, Iterable<String>> { @Override public Iterable<String> apply(String input) { final List<String> tokens = new Vector<String>(); final StringTokenizer tokenizer = new StringTokenizer(input.trim(), " ,.-?;:\n", true); boolean inSeg = false; String segToken = ""; while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); if (inSeg) { if (token.indexOf("))") > -1) { token = segToken + token.replaceFirst(java.util.regex.Pattern.quote("))"), ""); inSeg = false; } else { segToken += token; continue; } } else if (token.indexOf("((") > -1) { segToken = token.replaceFirst(java.util.regex.Pattern.quote("(("), ""); inSeg = true; continue; } token = token.trim(); if (token.length() > 0) { tokens.add(token); } } return tokens; } } %>