<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ page trimDirectiveWhitespaces="true" %> <%@ page import="org.crosswire.utils.Sessions" %> <%@ page import="java.util.Vector" %> <%@ page import="java.util.HashSet" %> <%@ page import="eu.interedition.collatex.simple.SimpleWitness" %> <%@ page import="eu.interedition.collatex.Witness" %> <%@ page import="eu.interedition.collatex.Token" %> <%@ page import="eu.interedition.collatex.io.*" %> <%@ page import="eu.interedition.collatex.jung.JungVariantGraph" %> <%@ page import="eu.interedition.collatex.simple.SimpleToken" %> <%@ page import="eu.interedition.collatex.simple.SimpleTokenNormalizers" %> <%@ page import="eu.interedition.collatex.simple.SimpleVariantGraphSerializer" %> <%@ page import="eu.interedition.collatex.simple.SimpleCollation" %> <%@ page import="eu.interedition.collatex.CollationAlgorithm" %> <%@ page import="eu.interedition.collatex.CollationAlgorithmFactory" %> <%@ page import="eu.interedition.collatex.matching.EqualityTokenComparator" %> <%@ page import="eu.interedition.collatex.VariantGraph" %> <%@ page import="eu.interedition.collatex.util.VariantGraphRanking" %> <%@ page import="com.google.common.collect.RowSortedTable" %> <%@ page import="java.io.BufferedReader" %> <%@ page import="java.io.InputStreamReader" %> <%@ page import="java.lang.StringBuilder" %> <%@ page import="java.util.Set" %> <%@ page import="java.util.List" %> <%@ page import="java.util.Iterator" %> <%@ page import="java.util.ArrayList" %> <%@ page import="java.util.Collections" %> <%@ page import="java.util.Arrays" %> <%@ page import="java.util.Comparator" %> <%@ page import="java.util.StringTokenizer" %> <%@ page import="com.google.common.base.Function" %> <%@ page import="javax.xml.stream.XMLOutputFactory" %> <%@ page import="javax.xml.stream.XMLStreamWriter" %> <%@ page import="java.io.StringWriter" %> <%@ page import="java.util.HashMap" %> <%@ page import="java.util.Map" %> <%@ page import="org.crosswire.utils.HTTPUtils" %> <%@ page import="org.crosswire.utils.Utils" %> <%@ page import="org.crosswire.sword.keys.VerseKey" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.WitnessReading" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.Convert" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Document" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Regularization" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Regularization.RegularizationRule" %> <%@ page import="org.apache.log4j.Logger" %> <%@ page import="org.json.JSONObject" %> <%@ page import="org.json.JSONArray" %> <%@ page import="org.crosswire.webtools.annotation.*" %> <%@ page import="org.crosswire.webtools.*" %> <%@ page import="javax.validation.constraints.Pattern" %> <%! @Description(value = "Collate text segments", name = "collate") public static class MyParameters extends Parameters { @Description(value = "witness text to include in collation. can repeat or can be numbered", example = "w=text or w1=text") public String w = null; @Description(value = "custom witness label to show for a witness, can repeat corresponding to {w} parameters or can be numbered.", example = "l=P52 or l1=P52") public String l = null; @Pattern(regexp = "^(atable|csv|graphml|dot|graph|tei|apptext|apphtml)?$", message = "Valid response formats: \"atable\", \"graph\", \"csv\", \"graphml\", \"dot\", \"tei\", \"apptext\", or \"apphtml\"") @Description(value = "specify the result format: atable, graph, csv, graphml, dot, tei, apptext, apphtml", defaultValue = "atable", example = "graph") public String format = null; @Pattern(regexp = "^(dekker|medite|needleman-wunsch)?$", message = "Valid response formats: \"dekker\", \"medite\", or \"needleman-wunsch\"") @Description(value = "one of: dekker, medite, needleman-wunsch", defaultValue = "dekker", example = "medite") public String algorithm = null; @Description(value = "collate witnesses which are members of a document group (-1 : all extant)", example = "123") public Integer documentGroupID = null; @Description(value = "which verse to collate (only applies to witnesses which are looked up (e.g., from a documentGroupID)", example = "Jn.2.2") public String indexContent = null; @Description(value = "include additional verses after indexContent parameter", defaultValue = "0", example = "1") public Integer extraVerses = 0; @Description(value = "Base text module name to include in the collation", example = "NA28") public String baseText = null; @Description(value = "Additional module name to include in the collation. Can repeat", example = "TR") public String loadModule = null; @Description(value = "Witness to include in the collation. Can repeat.", example = "10075") public Integer loadDocID = null; @Description(value = "Apply a general regularization to witnesses, ignoring supplied marks", defaultValue = "false", example = "true") public Boolean ignoreSupplied = false; @Description(value = "Apply a general regularization to witnesses, ignoring unclear marks", defaultValue = "false", example = "true") public Boolean ignoreUnclear = false; @Description(value = "Apply a general regularization to witnesses, ignoring punctuation", defaultValue = "false", example = "true") public Boolean ignorePunctuation = false; @Description(value = "Apply a general regularization to witnesses, ignoring Greek accent marks", defaultValue = "false", example = "true") public Boolean ignoreAccents = false; @Description(value = "Restrict witnesses to a given language", example = "grc") public String lang = null; @Description(value = "Apply regularization rules from this user, can be repeated, appended with '+' only localRules, appended with '*' only globalRules", example = "jsmith+") public String regUserID = null; @Description(value = "If set to a username and this username has a transcription for this verse, use the user's transcription over the global transcription", example = "jsmith") public String preferUser = null; // deprecated parameters @Description(value = "internal") public String biblicalContent = null; @Description(value = "internal") public String verse = null; @Override protected Map getRequestParameters() { Map requestParameterMap = super.getRequestParameters(); // clear out our w1..n and l1..n parameters before validation final java.util.regex.Pattern wSeries = java.util.regex.Pattern.compile("^[wl]\\d+$"); for (Iterator> it = requestParameterMap.entrySet().iterator(); it.hasNext(); ) { Map.Entry p = it.next(); if (wSeries.matcher(p.getKey()).matches()) { // getLogger().info("REMOVING PARAMETER: " + p.getKey()); it.remove(); } // else getLogger().info("NOT REMOVING PARAMETER: " + p.getKey()); } return requestParameterMap; } @Override protected void afterLoad() { if (indexContent == null) indexContent = biblicalContent; if (indexContent == null) indexContent = verse; } @Override protected void customValidation() { } } %> <% // check for json and read body must be first thing or our body is read by getParameter calls //dumpCallInfo(request, params.getLogger()); List witnesses = new ArrayList(); JSONObject requestJSON = null; if ("application/json".equals(request.getHeader("accept"))) { StringBuilder buffer = new StringBuilder(); BufferedReader reader = request.getReader(); String line; while ((line = reader.readLine()) != null) { buffer.append(line); } String data = buffer.toString(); try { WordWithParallelCombiningHintsTokenizer tokenizer = new WordWithParallelCombiningHintsTokenizer(); requestJSON = new JSONObject(data); JSONArray witnessesNode = requestJSON.getJSONArray("witnesses"); //params.getLogger().info("witnessesNode="+witnessesNode); witnesses = new ArrayList(witnessesNode.length()); for (int i = 0; i < witnessesNode.length(); ++i) { JSONObject witnessObject = (JSONObject)witnessesNode.get(i); final SimpleWitness witness = new SimpleWitness(witnessObject.getString("id").trim()); if (witnessObject.has("tokens")) { final JSONArray tokensArray = witnessObject.getJSONArray("tokens"); final List tokens = new ArrayList(tokensArray.length()); for (int j = 0; j < tokensArray.length(); ++j) { final JSONObject tokenObject = (JSONObject)tokensArray.get(j); String tokenContent = tokenObject.has("t")?tokenObject.getString("t"):null; String normalizedTokenContent = tokenObject.has("n")?tokenObject.getString("n"):null; if (normalizedTokenContent == null) normalizedTokenContent = SimpleWitness.TOKEN_NORMALIZER.apply(tokenContent); tokens.add(new SimpleToken(witness, tokenContent, normalizedTokenContent) { JSONObject jobj = tokenObject; public String toString() { return jobj.toString(); } }); } witness.setTokens(tokens); } else { final String contentNode = witnessObject.getString("content"); // witness = new SimpleWitness(witnessObject.getString("id").trim(), contentNode, tokenizer, SimpleTokenNormalizers.LC_TRIM_WS)); witness.setTokenContents(tokenizer.apply(contentNode), SimpleTokenNormalizers.LC_TRIM_WS); } witnesses.add(witness); } /* if (witnesses.isEmpty()) { throw new IOException("No witnesses in collation"); } Comparator tokenComparator = null; final JsonValue tokenComparatorNode = collationObject.get("tokenComparator"); if (tokenComparatorNode != null && tokenComparatorNode.getValueType() == JsonValue.ValueType.OBJECT) { final JsonObject tokenComparatorObject = (JsonObject) tokenComparatorNode; try { if ("levenshtein".equals(tokenComparatorObject.getString("type"))) { final int configuredDistance = tokenComparatorObject.getInt("distance", 0); tokenComparator = new EditDistanceTokenComparator(configuredDistance == 0 ? 1 : configuredDistance); } } catch (ClassCastException e) { // ignored } } if (tokenComparator == null) { tokenComparator = new EqualityTokenComparator(); } CollationAlgorithm collationAlgorithm = null; final JsonValue collationAlgorithmNode = collationObject.get("algorithm"); if (collationAlgorithmNode != null && collationAlgorithmNode.getValueType() == JsonValue.ValueType.STRING) { final String collationAlgorithmValue = ((JsonString) collationAlgorithmNode).getString(); if ("needleman-wunsch".equalsIgnoreCase(collationAlgorithmValue)) { collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(tokenComparator); } else if ("gst".equalsIgnoreCase(collationAlgorithmValue)) { collationAlgorithm = CollationAlgorithmFactory.greedyStringTiling(tokenComparator, 2); } else if ("medite".equalsIgnoreCase(collationAlgorithmValue)) { collationAlgorithm = CollationAlgorithmFactory.medite(tokenComparator, SimpleToken.TOKEN_MATCH_EVALUATOR); } } if (collationAlgorithm == null) { collationAlgorithm = CollationAlgorithmFactory.dekker(tokenComparator); } boolean joined = true; try { joined = collationObject.getBoolean("joined", true); } catch (ClassCastException e) { // ignored } if (collationAlgorithm instanceof InspectableCollationAlgorithm) { boolean mergeTranspositions = true; try { mergeTranspositions = collationObject.getBoolean("transpositions", true); } catch (ClassCastException e) { // ignored } ((InspectableCollationAlgorithm) collationAlgorithm).setMergeTranspositions(mergeTranspositions); } return new SimpleCollation(witnesses, collationAlgorithm, joined); */ } catch (Exception e) { e.printStackTrace(); } } %> <%! public static Vector getWitnessReadingsFromParams(HttpServletRequest request) { Vector retVal = new Vector(); String witnessSegment = null; int i = 1; for (i = 1; ((witnessSegment = request.getParameter("w"+i)) != null); ++i) { try { witnessSegment = Transcription.assureUnicode(witnessSegment); String label = request.getParameter("l"+i); if (label == null) label = "w"+i; label = Transcription.assureUnicode(label); WitnessReading wr = new WitnessReading(label, witnessSegment.trim()); retVal.add(wr); } catch (Exception e) { e.printStackTrace(); } } String w[] = request.getParameterValues("w"); String l[] = request.getParameterValues("l"); if (w != null) { for (int j = 0; j < w.length; ++j) { try { witnessSegment = Transcription.assureUnicode(w[j]); String label = l != null && l.length > j ? l[j] : null; if (label == null) label = "w"+i; label = Transcription.assureUnicode(label); WitnessReading wr = new WitnessReading(label, witnessSegment.trim()); retVal.add(wr); ++i; } catch (Exception e) { e.printStackTrace(); } } } return retVal; } public static void dumpCallInfo(HttpServletRequest request, Logger logger) { logger.info("collate called..."); logger.info("request.getContentType: " + request.getContentType()); logger.info("Headers: "); for (Object o: java.util.Collections.list(request.getHeaderNames())) { logger.info(o + "=" + request.getHeader(o.toString())); } logger.info("Attributes: "); for (Object o: java.util.Collections.list(request.getAttributeNames())) { logger.info(o + "=" + request.getAttribute(o.toString())); } logger.info("Parameters: "); for (Object o: java.util.Collections.list(request.getParameterNames())) { for (String v: request.getParameterValues(o.toString())) { logger.info(o + "=" + v); } } } %> <% MyParameters params = new MyParameters().loadFromRequest(request, response, false); if (params.getErrors().size() == 0) { String format = (requestJSON != null) ? "json" : params.format; String loadModule[] = request.getParameterValues("loadModule"); String loadDocID[] = request.getParameterValues("loadDocID"); String dotPath = Utils.getSysConfig(session).getProperty("GraphvizCmd"); String defaultFontSpec = Utils.getSysConfig(session).getProperty("DefaultFontSpec", "GentiumPlus"); if (dotPath != null) SimpleVariantGraphSerializer.setDotPath(dotPath); boolean usage = false; boolean givenBaseText = false; if (format == null) format = "atable"; RowSortedTable> table = null; VariantGraph graph = null; String primaryLang = params.lang; Vector witnessesFromRequest = getWitnessReadingsFromParams(request); if (requestJSON != null || !witnessesFromRequest.isEmpty() || params.documentGroupID != null || loadDocID != null || loadModule != null) { VerseKey verseKey = null; if (params.indexContent != null) { verseKey = new VerseKey(); verseKey.setIntros(true); verseKey.setText(params.indexContent); if (params.extraVerses > 0) { VerseKey endKey = new VerseKey(); endKey.setIntros(true); endKey.setText(params.indexContent); endKey.increment(params.extraVerses); params.indexContent = verseKey.getShortText() + "-" + endKey.getShortText(); } } VerseKey vk = new VerseKey(); vk.setIntros(true); Vector witnessReadings = new Vector(); // First see if we've been passed any verbatim witness text in w1, w2, w3... params and include those witnessReadings.addAll(witnessesFromRequest); // Next see if we've been passed a Document Group of manuscript IDs to collate and include those if (params.documentGroupID != null && params.indexContent != null) { witnessReadings.addAll(Transcription.getWitnessReadings(params.documentGroupID, params.indexContent, !params.ignoreSupplied, !params.ignoreUnclear, !params.ignorePunctuation, !params.ignoreAccents, params.preferUser, params.lang)); } /* for (WitnessReading wr: witnessReadings) { params.getLogger().info(wr.getLabel() + ": " + wr.getText()); } */ // if we've been given a baseText, then retrieve the reading. If not, then don't retrieve, but set to NA28 as default, in case it is given in our witness list params.getLogger().debug("baseText: " + params.baseText); if (params.baseText != null) { // see if our baseText has already been passed to us for (WitnessReading wr : witnessReadings) { params.getLogger().debug("checking: " + params.baseText + " == " + wr.getDocumentName() + " || " + wr.getLabel()); if (params.baseText.equals(wr.getDocumentName()) || params.baseText.equals(wr.getLabel())) { givenBaseText = true; if (!wr.getDocumentName().equals(wr.getLabel())) { // if we've been provided just a document name which has multiple hands, then set to first hand params.baseText = wr.getLabel(); } break; } } // if we haven't been passed our baseText but we have a verse, let's try to retrieve the baseText reading if (!givenBaseText && params.indexContent != null) { witnessReadings.add(loadModuleReading(params.baseText, params.indexContent, !params.ignoreSupplied, !params.ignoreUnclear, !params.ignorePunctuation, !params.ignoreAccents, params.getLogger())); } } else params.baseText = "NA28"; if (loadModule != null && params.indexContent != null) { for (String m : loadModule) { witnessReadings.add(loadModuleReading(m, params.indexContent, !params.ignoreSupplied, !params.ignoreUnclear, !params.ignorePunctuation, !params.ignoreAccents, params.getLogger())); } } if (loadDocID != null && params.indexContent != null) { for (String m : loadDocID) { try { Document d = Document.getDocument(Integer.parseInt(m)); //params.getLogger().info("loading document: " + d.getGANumber()); witnessReadings.addAll(Transcription.getWitnessReadings(new Document[] { d }, params.indexContent, !params.ignoreSupplied, !params.ignoreUnclear, !params.ignorePunctuation, !params.ignoreAccents, params.preferUser)); } catch (Exception e) { e.printStackTrace(); } } //params.getLogger().info("witnessReadings: " + witnessReadings); } String regUserIDs[] = request.getParameterValues("regUserID"); if (regUserIDs != null && regUserIDs.length > 0) { Vector regRules = new Vector(); for (String regUser : regUserIDs) { boolean global = true; boolean local = true; if (regUser.endsWith("+")) { global = false; regUser = regUser.substring(0, regUser.length()-1); } else if (regUser.endsWith("*")) { local = false; regUser = regUser.substring(0, regUser.length()-1); } params.getLogger().debug("Reg Rules: " + regUser + "; global: " + global + "; local: " + local); if (global) regRules.addAll(Arrays.asList(Regularization.getRulesGlobal(regUser, false))); if (local) regRules.addAll(Arrays.asList(Regularization.getRulesByVerse(regUser, false, params.indexContent))); } params.getLogger().debug("Rules count: " + regRules.size()); for (WitnessReading wr : witnessReadings) { wr.setText(Regularization.regularize(verseKey.getHashNumber(), regRules, wr.getText())); } } Vector lacunaWitnesses = new Vector(); Vector omittedWitnesses = new Vector(); Vector presentWitnesses = new Vector(); WordWithParallelCombiningHintsTokenizer tokenizer = new WordWithParallelCombiningHintsTokenizer(); boolean stripOmitted = false; boolean stripLacuna = false; // "text" is for legacy purposes; remove when sure it is not used if ("apphtml".equals(format) || "text".equals(format) || "apptext".equals(format) || "atable".equals(format) || "csv".equals(format)) { stripOmitted = true; stripLacuna = true; } if ("graph".equals(format)) { /* stripOmitted = true; stripLacuna = true; */ } for (WitnessReading wr : witnessReadings) { if (primaryLang == null && wr.getDocumentID() > 0) { Document d = Document.getDocument(wr.getDocumentID()); primaryLang = d.getLanguage(); } if ("(([ Omitted ]))".equals(wr.getText())) { omittedWitnesses.add(wr.getLabel()); if (stripOmitted) continue; } else if ("[lac]".equals(wr.getText())) { lacunaWitnesses.add(wr.getLabel()); if (stripLacuna) continue; } else if (givenBaseText || !wr.getLabel().equals(params.baseText)) presentWitnesses.add(wr.getLabel()); witnesses.add(new SimpleWitness(wr.getLabel(), wr.getText(), tokenizer, SimpleTokenNormalizers.LC_TRIM_WS)); } String algorithm = (requestJSON != null) ? requestJSON.getString("algorithm") : params.algorithm; CollationAlgorithm collationAlgorithm = null; if ("medite".equals(algorithm)) { collationAlgorithm = CollationAlgorithmFactory.medite(new EqualityTokenComparator(), SimpleToken.TOKEN_MATCH_EVALUATOR); } else if ("needleman-wunsch".equals(algorithm)) { collationAlgorithm = CollationAlgorithmFactory.needlemanWunsch(new EqualityTokenComparator()); } else { collationAlgorithm = CollationAlgorithmFactory.dekker(new EqualityTokenComparator()); } boolean joined = false; graph = new SimpleCollation(witnesses, collationAlgorithm, joined).collate(new JungVariantGraph()); /* // from old method, already done in the SimpleCollation class above //collationAlgorithm.collate(graph, witnesses.toArray(new SimpleWitness[0])); for (String wl : omittedWitnesses) { graph.register(new SimpleWitness(wl, "")); } */ if ("graphml".equals(format)) { response.setContentType("text/xml"); StringWriter writer = new StringWriter(); XMLStreamWriter swriter = javax.xml.stream.XMLOutputFactory.newFactory().createXMLStreamWriter(writer); SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(graph); serializer.toGraphML(swriter); %><%=writer.toString()%><% } else if ("graph".equals(format)) { response.setContentType("text/xml"); SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(graph); serializer.toSVG(out); } else if ("dot".equals(format)) { response.setContentType("text/plain"); SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(graph); serializer.toDot(out); } // "text" is for legacy purposes; remove when sure it is not used anymore else if ("apptext".equals(format) || "text".equals(format)) { response.setContentType("text/plain"); SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(graph); out.write("Not Present in " + SimpleVariantGraphSerializer.toWitnessesLabel(lacunaWitnesses, true)+"\n\n"); out.write("Present in " + SimpleVariantGraphSerializer.toWitnessesLabel(presentWitnesses, true)+"\n\n"); try { serializer.toPlainTextApparatus(out, params.baseText); } catch (Exception e) { params.getLogger().error("ERROR in apptext display: "); e.printStackTrace(); } } else if ("apphtml".equals(format)) { response.setContentType("text/html"); SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(graph); out.write("

Not Present in " + SimpleVariantGraphSerializer.toWitnessesLabelHTML(lacunaWitnesses, true)+"

"); out.write("

Present in " + SimpleVariantGraphSerializer.toWitnessesLabelHTML(presentWitnesses, true)+"

"); out.write("
"); try { serializer.toHTMLApparatus(out, params.baseText); } catch (Exception e) { params.getLogger().error("ERROR in apphtml display: "); e.printStackTrace(); } out.write("
"); } else if ("atable".equals(format) || "csv".equals(format)) { boolean csv = "csv".equals(format); table = VariantGraphRanking.of(graph).asTable(); if (!csv) { %> <% } else { response.setContentType("text/csv"); response.setHeader("Content-Disposition", "attachment; filename=collation"+(params.indexContent != null ? ("-"+params.indexContent) : "")+".csv"); out.print("\uFEFF"); } int row = 0; for (Witness witness : table.columnKeySet()) { ++row; String sigil = witness.getSigil().trim(); sigil.replace("\n", ""); if (!csv) { %> <% } else { out.print("\""+sigil+"\""); } int col = 0; for (Integer wordNum : table.rowKeySet()) { ++col; if (!csv) { %> <% } else { out.print("\""); } } if (!csv) out.print(""); else out.print("\n"); } if (!csv) { %>
<%= sigil %> <% } else { out.print(",\""); } Set tokens = table.get(wordNum, witness); if (tokens != null) { for (Token token : tokens) { out.print(((SimpleToken)token).getContent().trim()); } } if (!csv) { %>
<% } } else if ("tei".equals(format)) { response.setContentType("text/xml"); StringWriter writer = new StringWriter(); XMLStreamWriter swriter = XMLOutputFactory.newFactory().createXMLStreamWriter(writer); SimpleVariantGraphSerializer serializer = new SimpleVariantGraphSerializer(graph); serializer.toTEI(swriter); %> <%=writer.toString()%> <% } // This is stuff Bham wants as input to their Apparatus editing tool else if ("json".equals(format)) { params.getLogger().debug("*** Collate returning JSON ***"); response.setContentType("application/json"); table = VariantGraphRanking.of(graph).asTable(); StringBuffer apparatus = new StringBuffer(); // "[ // { // start:0, // end:0, // readings:[ // { // witnesses:['w1', 'w2'], // text:[ // { // index:0, // t:agaph, // reading:['w1', 'w2'], // manuscipts:[20001, 20001] // },{...word2...} // ] // }, // {... reading 2 ... } // ] // } // ]" // apparatus.append("["); HashMap>> app = new HashMap>>(); int segment = 2; Vector aTextReadings = new Vector(); for (Integer wordNum : table.rowKeySet()) { apparatus.append("{"); apparatus.append("\"start\":"+segment+","); apparatus.append("\"end\":"+segment+","); final HashMap> readings = new HashMap>(); for (Witness witness : table.columnKeySet()) { Set tokens = table.get(wordNum, witness); String reading = "|lac|"; if (tokens != null) { reading = ""; for (Token token : tokens) { reading += ((SimpleToken)token).getContent(); } } Vector wits = readings.get(reading); if (wits == null) { readings.put(reading, new Vector()); wits = readings.get(reading); } wits.add(witness.getSigil()); } final Vector rdngs = new Vector(); rdngs.addAll(readings.keySet()); Collections.sort(rdngs, new Comparator() { public int compare(String o1, String o2) { return readings.get(o2).size() - readings.get(o1).size(); } }); apparatus.append("\"readings\":["); for (String read : rdngs) { Vector wits = readings.get(read); apparatus.append("{ \"witnesses\":"); String ws = "["; boolean first = true; for (String w: wits) { if (!first) ws +=", "; first = false; ws+= ("\""+w+"\""); } ws += "]"; apparatus.append(ws+",\"text\":["+(!"|lac|".equals(read)?("{\"index\":"+segment+", \"t\":\""+read+"\",\"reading\":"+ws+", \"manuscript\":"+ws+"}"):"")+"]"); apparatus.append("},"); } apparatus.append("]"); apparatus.append("},"); String r = rdngs.get(0); if ("|lac|".equals(r)) r = ""; aTextReadings.add(r); app.put(segment, readings); segment+=2; } apparatus.append("]"); String appts = apparatus.toString().replaceAll(",]","]"); appts = appts.replaceAll(",}","}"); params.getLogger().debug("*** Collate returning: " + appts); out.print("{"); out.print("\"apparatus\":"+appts+","); out.print("\"overtext\":[{\"tokens\":["); int seg = 2; String overtext = ""; for (String aText : aTextReadings) { overtext += "{\"index\":"+seg+", \"t\":\""+aText+"\", \"reading\":\"aText\",\"manuscript\":\"aText\"},"; seg+=2; } overtext += "]}]}"; overtext = overtext.replaceAll(",]","]"); overtext = overtext.replaceAll(",}","}"); out.print(overtext); } else { usage = true; } } else { if (params.format == null) usage = true; } if (!usage) return; else ((Parameters)params).format = "html"; } else ((Parameters)params).format = "html"; Serializer.reportErrors(request, response, out, params, true); %> <%! static HashMap specialFonts = new HashMap(); static HashSet rightJustify = new HashSet(); static { specialFonts.put("bo", "Antinoou, AntinoouWeb"); specialFonts.put("sa", "Antinoou, AntinoouWeb"); specialFonts.put("fa", "Antinoou, AntinoouWeb"); specialFonts.put("mae", "Antinoou, AntinoouWeb"); specialFonts.put("ly", "Antinoou, AntinoouWeb"); specialFonts.put("cw", "Antinoou, AntinoouWeb"); specialFonts.put("syc", "Estrangelo Edessa, EstreWeb"); specialFonts.put("chu", "BukyvedeWeb"); rightJustify.add("syc"); rightJustify.add("he"); rightJustify.add("arb"); } public static WitnessReading loadModuleReading(String moduleName, String key, boolean supplied, boolean unclear, boolean punct, boolean accents, Logger logger) { String moduleServiceURL = "http://crosswire.org/study/fetchdata.jsp"; String params = "mod="+java.net.URLEncoder.encode(moduleName) + "&key="+java.net.URLEncoder.encode(key) + "&format=strip"; StringBuffer result = HTTPUtils.postURL(moduleServiceURL, params); String t = result.toString().toLowerCase(); WitnessReading retVal = null; try { Vector wits = Convert.getTEITranscriptionText(t, supplied, unclear, punct, accents); retVal = (wits.size() > 0) ? wits.get(0) : new WitnessReading("", t); } catch (Exception e) { logger.error("Error looking up module: " + moduleName, e); retVal = new WitnessReading("", t); } retVal.setDocumentName(moduleName); return retVal; } public static class WordWithParallelCombiningHintsTokenizer implements Function> { @Override public Iterable apply(String input) { final List tokens = new Vector(); final StringTokenizer tokenizer = new StringTokenizer(input.trim(), " ,.-?;:\n", true); boolean inSeg = false; String segToken = ""; while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); if (inSeg) { if (token.indexOf("))") > -1) { token = segToken + token.replaceFirst(java.util.regex.Pattern.quote("))"), ""); inSeg = false; } else { segToken += token; continue; } } else if (token.indexOf("((") > -1) { segToken = token.replaceFirst(java.util.regex.Pattern.quote("(("), ""); inSeg = true; continue; } token = token.trim(); if (token.length() > 0) { tokens.add(token); } } return tokens; } } %>