<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ page trimDirectiveWhitespaces="true" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Apparatus" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Apparatus.Segment" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Apparatus.SegmentReading" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Apparatus.SegmentReading.ReadingWord" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.WitnessReading" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.Convert" %> <%@ page import="org.crosswire.utils.Utils" %> <%@ page import="org.crosswire.data.DataObject" %> <%@ page import="org.crosswire.utils.HTTPUtils" %> <%@ page import="org.crosswire.sword.keys.VerseKey" %> <%@ page import="org.crosswire.sword.keys.ListKey" %> <%@ page import="java.util.Map" %> <%@ page import="java.util.HashMap" %> <%@ page import="java.util.TreeMap" %> <%@ page import="java.util.List" %> <%@ page import="java.util.Arrays" %> <%@ page import="java.util.ArrayList" %> <%@ page import="java.util.Vector" %> <%@ page import="java.util.UUID" %> <%@ page import="java.util.Collections" %> <%@ page import="org.apache.log4j.Logger" %> <%@ page import="org.crosswire.xml.XMLBlock" %> <%@ page import="org.crosswire.webtools.annotation.*" %> <%@ page import="org.crosswire.webtools.*" %> <%@ page import="javax.validation.constraints.NotNull" %> <%! static final int BUFFEREDWORDSMAX = 1000; @Description(value = "Retrieve variant apparatus data", name = "variant/apparatus/get") public static class MyParameters extends Parameters { @NotNull @Description(value = "which range of apparatus to obtain", example = "Acts.4.5") public String indexContent = null; @Description(value = "limit results to a specific segmentGroupID; -1 = all", defaultValue = "-1", example = "3") public Integer segmentGroupID = null; @Description(value = "augment results with segments from another group", example = "7") public Integer augmentGroupID = null; @Description(value = "include a baseline edition with apparatus", example = "ECM") public String includeBaseline = null; @Description(value = "when computing the word context, include this many baseline words before and after reading or word", defaultValue = "3", example = "5") public Integer wordContextSize = 3; @Description(value = "convert 'a' readings to positive apparatus", defaultValue = "false", example = "true") public Boolean positiveConversion = false; @Description(value = "save search data as test index", defaultValue = "false", example = "true") public Boolean testIndex = false; @Description(value = "add extra detail if desired", defaultValue = "complete", example = "extra") public String detail = "complete"; @Override protected void customValidation() { } } public static WitnessReading loadModuleReading(String moduleName, String key, boolean supplied, boolean unclear, boolean punct, boolean accents, Parameters params) { String moduleServiceURL = "http://crosswire.org/study/fetchdata.jsp"; String postParams = "mod="+java.net.URLEncoder.encode(moduleName) + "&key="+java.net.URLEncoder.encode(key) + "&format=raw"; StringBuffer result = HTTPUtils.postURL(moduleServiceURL, postParams); String t = result.toString(); WitnessReading retVal = null; try { retVal = new WitnessReading("", t); /* Vector wits = Convert.getTEITranscriptionText(t, supplied, unclear, punct, accents); retVal = (wits.size() > 0) ? wits.get(0) : new WitnessReading("", t); */ } catch (Exception e) { params.getLogger().error("Error looking up module: " + moduleName, e); retVal = new WitnessReading("", t); } retVal.setDocumentName(moduleName); return retVal; } static void pushData(StringBuffer bulkBuffer, Integer segmentGroupID, String indexSuffix, javax.servlet.jsp.JspWriter out) { try { // out.print(bulkBuffer.toString()); StringBuffer result = HTTPUtils.postURL("http://127.0.0.1:9200/apparatus"+segmentGroupID+indexSuffix+"/_bulk", bulkBuffer.toString(), null, null, null, HTTPUtils.POST, false, null, HTTPUtils.PARAMSTYPE_JSON); out.print("\n\n\nResult: " + result); } catch (Exception e) { System.out.println("error pushing data" + e); e.printStackTrace(); } } static void deleteData(Integer segmentGroupID, String indexSuffix, javax.servlet.jsp.JspWriter out) { try { // out.print(bulkBuffer.toString()); StringBuffer result = HTTPUtils.postURL("http://127.0.0.1:9200/apparatus"+segmentGroupID+indexSuffix, null, null, null, null, HTTPUtils.DELETE, false, null, HTTPUtils.PARAMSTYPE_JSON); out.print("\n\n\nResult: " + result); } catch (Exception e) { System.out.println("error pushing data" + e); e.printStackTrace(); } } static Map indeclinable = new HashMap() {{ put("ADV", "ADVerb or adverb and particle combined"); put("CONJ", "CONJuction or conjuctive particle"); put("COND", "CONDitional particle or conjunction"); put("PRT", "PaRTicle, disjunctive particle"); put("PREP", "PREPosition"); put("INJ", "INterJection"); put("ARAM", "ARAMaic transliterated word"); put("HEB", "HEBrew transliterated word"); put("N-PRI", "Indeclinable PRoper Noun"); put("A-NUI", "Indeclinable NUmeral (Adjective)"); put("N-LI", "Indeclinable Letter (Noun)"); put("N-OI", "Indeclinable Noun of Other type"); }}; static Map primaryTypes = new HashMap() {{ put("N", "Noun"); put("V", "Verb"); put("A", "Adjective"); put("R", "Relative pronoun"); put("C", "reCiprocal pronoun"); put("D", "Demonstrative pronoun"); put("T", "definite arTicle"); put("K", "correlative pronoun (K)"); put("I", "Interrogative pronoun"); put("X", "indefinite pronoun (X)"); put("Q", "correlative or interrogative pronoun (Q)"); put("F", "reFlexive pronoun (person 1,2,3 added)"); put("S", "poSsessive adjective (person 1,2,3 and number S,P added"); put("P", "Personal pronoun"); }}; static void addWordParts(Map record, ReadingWord rw) { addWordParts(record, rw, ""); } static void addWordParts(Map record, ReadingWord rw, String prefix) { try { record.put(prefix+"text", rw.getText()); record.put(prefix+"textLength", rw.getText().length()); String lemma = rw.getLemma(); if (lemma != null) { lemma = lemma.trim(); record.put(prefix+"lemma", lemma); } String morph = rw.getMorph(); if (morph != null) { String checkMorph = morph.replaceAll("[A-Z0-9?-]*", ""); if (checkMorph.length() > 0) { record.put(prefix+"error", checkMorph); } record.put(prefix+"morph", morph); String segs[] = morph.split("-"); // remove "F" = fault segment, if exists // and set fault = true List segsToBe = new ArrayList(); for (int i = 0; i < segs.length; ++i) { if (i > 0 && "F".equals(segs[i])) { record.put(prefix+"fault", true); } else segsToBe.add(segs[i]); } segs = segsToBe.toArray(new String[]{}); String primaryForm = "Unknown"; String suffix = null; String primaryFormDesc = indeclinable.get(segs[0]); if (primaryFormDesc != null) { primaryForm = morph; record.put(prefix+"indeclinable", true); if (segs.length > 1) suffix = segs[1]; } else if (segs.length > 1 && indeclinable.get(String.join("-", segs[0], segs[1])) != null) { primaryFormDesc = indeclinable.get(String.join("-", segs[0], segs[1])); primaryForm = morph; record.put(prefix+"indeclinable", true); if (segs.length > 2) suffix = segs[2]; } else { if (segs.length > 0) { for (int i = 0; i < segs.length; ++i) { record.put(prefix+"morphSeg"+(i+1), segs[i]); } primaryFormDesc = primaryTypes.get(segs[0]); if (primaryFormDesc != null) { String cng = null; primaryForm = segs[0]; if ("V".equals(primaryForm)) { String tvm = segs[1]; if (tvm.length() == 3 || tvm.length() == 4) { record.put(prefix+"tvm", tvm); String tense = null; if (tvm.startsWith("2")) { tense = tvm.substring(0,2); } else { tense = tvm.substring(0,1); } tvm = tvm.substring(tense.length()); if (tense != null) record.put(prefix+"tense", tense); String voice = tvm.substring(0,1); if (voice != null) record.put(prefix+"voice", voice); String mood = tvm.substring(1,2); if (mood != null) record.put(prefix+"mood", mood); } if (segs.length > 2) { String pnOrCng = segs[2]; if (pnOrCng.length() > 0 && Character.isDigit(pnOrCng.charAt(0))) { record.put(prefix+"person", pnOrCng.substring(0,1)); if (pnOrCng.length() > 1) { record.put(prefix+"number", pnOrCng.substring(1,2)); } } else cng = pnOrCng; } } else if ("S".equals(primaryForm) && segs.length > 1) { String pncng = segs[1]; if (pncng.length() > 0 && Character.isDigit(pncng.charAt(0))) { record.put(prefix+"personPron", pncng.substring(0,1)); if (pncng.length() > 1) { record.put(prefix+"numberPron", pncng.substring(1,2)); if (pncng.length() > 2) { cng = pncng.substring(2); } } } } else { if (segs.length > 1) { cng = segs[1]; /* if (segs.length > 2) { cng = segs[2]; } */ } } if (cng != null) { if (cng.length() > 1 && Character.isDigit(cng.charAt(0))) { record.put(prefix+"person", cng.substring(0,1)); cng = cng.substring(1); } if (cng.length() > 0) { record.put(prefix+"cng", cng); String morphCase = cng.substring(0,1); record.put(prefix+"case", morphCase); if (cng.length() > 1) { String number = cng.substring(1,2); record.put(prefix+"number", number); if (cng.length() > 2) { String gender = cng.substring(2,3); record.put(prefix+"gender", gender); } } } } } else { primaryForm = "Unknown"; } } } record.put(prefix+"primaryForm", primaryForm); if (primaryFormDesc != null) record.put(prefix+"primaryFormDesc", primaryFormDesc); if (suffix != null) record.put(prefix+"suffix", suffix); record.put(prefix+"wnVerse", rw.getWordNumberVerse()); record.put(prefix+"wnBook", rw.getWordNumberBook()); } } catch (Exception e) { System.out.println("ERROR ADDING WORD PARTS. " + e); e.printStackTrace(); } } void checkTranspositions(List > wordRecords, Logger logger) { try { // // Build an ordered (by reading word order) map of words which have a baseword.wnVerse // Map > wordMap = new TreeMap >(); for (Map wr : wordRecords) { Integer wordNumberReading = (Integer)wr.get("readingWordNumber"); Integer wordNumberBL = (Integer)wr.get("baseword.wnVerse"); logger.info("wordNumberReading: " + wordNumberReading); logger.info("wordNumberBL: " + wordNumberBL); if (wordNumberReading != null && wordNumberBL != null) wordMap.put(wordNumberReading, wr); } logger.info("Checking Transpositions... wordMap.size(): " + wordMap.size()); int lastWordNumberReading = -1; int lastWordNumberBL = -1; // check for simple single word transpositions // BL 2 4 6 8 10 12 14 16 18 20 22 24 26 PC Delta // // i. 2 / (8) \ 6 \ (4) / 10 / 12 / 14 / 16 / 18 / 20 / 22 / 24 / 26 // ii. 2 / (6) \ (4) / 8 / 10 / 12 / 14 / 16 / 18 / 20 / 22 / 24 / 26 // iii. 2 / (24) \ 6 / 8 / 10 / 12 / 14 / 16 / 18 / 20 / 22 \ (4) / 26 // iv. 2 / (6) \ (4) / 8 / 10 / 12 / 14 / (24) \ 18 / 20 / 22 \ (16) / 26 // v. 2 / (8) \ 6 \ (4) / 10 / 12 / 14 / (24) \ 18 / 20 / 22 \ (16) / 26 // vi. 2 / 4 / 6 / (24) \ 8 / 10 / 12 / 14 / 16 / 18 / 20 / 22 / 26 // vii. 2 / (24) \ 6 \ (16) / 10 / 12 / 14 / (8) \ 18 / 20 / 22 \ (4) / 26 // viii. 2 / 6 / 8 \ (4) / 10 // viii. (6) \ 2 / 4 / 8 2 1 // viii. (6) \ 4 \ (2) / 8 2 2 // 6 4 2 8 10 // 6 4 2 10 for (Integer wordNumberReading : wordMap.keySet()) { Map word = wordMap.get(wordNumberReading); Integer wordNumberBL = (Integer)word.get("baseword.wnVerse"); // logger.info("wordNumberBL: " +wordNumberBL); // move on if we've been marked as transposed Integer alreadyTransposedWN = (Integer)word.get("transposedBaselineWN"); if (alreadyTransposedWN != null) continue; if (lastWordNumberReading > -1) { if (wordNumberBL < lastWordNumberBL) { logger.info("**************** Found Transposition !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"); // strategy: find the lowest BL word number after me until a BL word number greater than me // and assume that is the transposed word // unless its BL word is +2 from me int transposedBL = wordNumberBL; int transposedWN = wordNumberReading; int highestBaselineWNBeforeLast = -1; int possibleCount = 0; for (Integer wordNumberT : wordMap.keySet()) { Map wordT = wordMap.get(wordNumberT); Integer wordNumberBLT = (Integer)wordT.get("baseword.wnVerse"); logger.info("wordT: " + wordT.get("text") + "; wordNumberBLT: " + wordNumberBLT + "; transposedBL: " + transposedBL + "; highestBaselineWNBeforeLast: " + highestBaselineWNBeforeLast + "; possibleCount: " + possibleCount + "; transposedWN: " + transposedWN + "; lastWordNumberReading: " + lastWordNumberReading); if (wordNumberT < lastWordNumberReading) { if (wordNumberBLT > highestBaselineWNBeforeLast) { highestBaselineWNBeforeLast = wordNumberBLT; } } else if (wordNumberT > wordNumberReading) { ++possibleCount; if (wordNumberBLT > lastWordNumberBL) { break; } if (wordNumberBLT < transposedBL) { transposedBL = wordNumberBLT; transposedWN = wordNumberT; } } } // we have just slid to a later position, we haven't transposed (viii.) if (transposedBL < highestBaselineWNBeforeLast) { word = wordMap.get(transposedWN); word.put("transposedBaselineWN", -1); logger.info("Found Transposition slide forward! : " + word.get("text") + " (" + transposedWN + ") ->- "); } else if (possibleCount <= (transposedWN - lastWordNumberReading)) { word = wordMap.get(transposedWN); word.put("transposedBaselineWN", lastWordNumberBL); String firstWord = (String)word.get("text"); word = wordMap.get(lastWordNumberReading); word.put("transposedBaselineWN", transposedBL); logger.info("Found Transposition pair! : " + firstWord + " (" + lastWordNumberBL + ") ->-<- " + word.get("text") + " (" + transposedBL + ")"); } else { word = wordMap.get(lastWordNumberReading); logger.info("Found Transposition slide backward! : " + word.get("text") + " (" + lastWordNumberReading + ") " + "-<-"); word.put("transposedBaselineWN", -2); } logger.info("transposedBL: " + transposedBL + "; highestBaselineWNBeforeLast: " + highestBaselineWNBeforeLast + "; possibleCount: " + possibleCount + "; transposedWN: " + transposedWN + "; lastWordNumberReading: " + lastWordNumberReading); } } lastWordNumberReading = wordNumberReading; lastWordNumberBL = wordNumberBL; } } catch (Exception e) { e.printStackTrace(); } } %> <% MyParameters params = new MyParameters().loadFromRequest(request, response, false); if (params.getErrors().size() == 0) { int detail = Segment.parseDetail(params.detail, Segment.DETAIL_COMPLETE); List segments = Apparatus.getSegments(params.indexContent, params.segmentGroupID); if (params.augmentGroupID != null) { List augments = Apparatus.getSegments(params.indexContent, params.augmentGroupID); Apparatus.augmentSegments(segments, augments); } if (params.positiveConversion) { // Acts: 327 Apparatus.buildPositiveApparatus(segments, 327); } Collections.sort(segments); if ("json".equals(params.format)) { StringBuffer retVal = new StringBuffer(); retVal.append(""); for (Segment s : segments) { retVal.append(s.toFormattedXML()); } retVal.append(""); Serializer.output(response, out, params, XMLBlock.createXMLBlock(retVal.toString())); return; } List baselineWords = new ArrayList(); StringBuffer bulkBuffer = new StringBuffer(); int bufferedWords = 0; response.setContentType("text/plain"); boolean addBaseline = false; String lastVerse = "yoyo"; //params.getLogger().info("segments.size(): " + segments.size()); int wordNumberBookVerseBase = 0; int wordNumberBookLast = wordNumberBookVerseBase; deleteData(params.augmentGroupID != null ? params.augmentGroupID : params.segmentGroupID, params.testIndex ? "_test":"", out); for (Segment s : segments) { List > segmentWordRecords = new ArrayList >(); WitnessReading baselineReading = new WitnessReading("", ""); String verseTextShort = s.getVerseTextShort(); VerseKey vk = new VerseKey(); vk.setIntros(true); vk.setText(verseTextShort); if (params.includeBaseline != null && !lastVerse.equals(verseTextShort)) { baselineReading = loadModuleReading(params.includeBaseline, verseTextShort, true, true, true, true, params); //params.getLogger().info("Loading baseline reading for " + verseTextShort + ": " + baselineReading.toString()); if (baselineReading != null) { baselineWords = baselineReading.getReadingWords(); wordNumberBookVerseBase = wordNumberBookLast; for (ReadingWord rw : baselineWords) { wordNumberBookLast += 2; rw.setWordNumberBook(wordNumberBookLast); } addBaseline = true; } lastVerse = verseTextShort; } String cd = s.getContextDescription().replaceAll("[^0-9\\-]", ""); int start = 0; try { start = Integer.parseInt(cd.split("-")[0]); } catch (Exception e) {} int end = start; try { end = Integer.parseInt(cd.split("-")[1]); } catch (Exception e) {} StringBuffer contextPre = new StringBuffer(); StringBuffer contextPost = new StringBuffer(); StringBuffer contextBase = new StringBuffer(); int readingWordStart = (start + (start % 2)) / 2; int readingWordEnd = end / 2; // we want the rounding down if an odd number so we get every word of the base text int wordNumberBookSegmentBase = wordNumberBookVerseBase + start; List baselineReadingWords = new ArrayList(); int wordNumber = 0; //params.getLogger().info("Baseline Words: " + Serializer.toJSON(baselineWords)); for (ReadingWord rw : baselineWords) { ++wordNumber; if (wordNumber < readingWordStart) { if (contextPre.length() > 0) contextPre.append(" "); contextPre.append(rw.getText()); } else if (wordNumber > readingWordEnd) contextPost.append(" " + rw.getText()); else { if (contextBase.length() > 0) contextBase.append(" "); contextBase.append(rw.getText()); baselineReadingWords.add(rw); } } //params.getLogger().info("ContextPre ("+readingWordStart+"): " + contextPre.toString()); //params.getLogger().info("ContextPost ("+readingWordEnd+"): " + contextPost.toString()); List srs = new ArrayList(); if (addBaseline) { addBaseline = false; srs.add(new SegmentReading(0, "baseline", baselineReading.getText())); } srs.addAll(Arrays.asList(s.getSegmentReadings())); //params.getLogger().info("getSegmentReadings().size(): " + s.getSegmentReadings().length); for (SegmentReading sr : srs) { // skip all the stuff we don't care about if (sr.getReadingName() == null) continue; if (sr.getReadingName().trim().startsWith("z")) continue; if (sr.getReadingName().trim().endsWith("zx")) continue; if (sr.getReadingName().trim().equals("a")) continue; if (sr.getReadingName().trim().equals("?")) continue; List baselineRemainingWords = new ArrayList(); List subLabels = sr.getSubLabels(); subLabels.add(0, ""); for (String subLabel : subLabels) { String readingText = sr.getReading(subLabel).trim(); readingText = readingText.replaceAll("]*>", ""); readingText = readingText.replaceAll("", ""); //params.getLogger().info("label: " + sr.getReadingName() + ": readingText: " + readingText); if (readingText.startsWith("(")) continue; baselineRemainingWords.addAll(baselineReadingWords); //params.getLogger().info("baselineRemainingWords.size(): " + baselineRemainingWords.size()); wordNumber = 0; List rws = sr.getReadingWords(subLabel); //params.getLogger().info("rws.size(): " + rws.size()); int witnessCount = sr.getWitnesses(subLabel).size(); String witnesses[] = new String[0]; try { witnesses = sr.getWitnessesText(false, subLabel, "|").split("\\|"); } catch (Exception e) {} int wordNumberBookLastBaseline = wordNumberBookSegmentBase - 2; int wordNumberVerseLastBaseline = start - 2; List > wordRecords = new ArrayList >(); String readingLabel = sr.getReadingName()+subLabel; String readingID = Integer.toString(s.getVerseHash()); readingID += String.format("%03d", start); readingID += String.format("%03d", end); readingID += readingLabel; //params.getLogger().info("Processing reading: " + cd + "." + readingLabel + "; words: " + rws.size()); for (ReadingWord rw : rws) { String lemma = rw.getLemma(); if (lemma != null) lemma = lemma.trim(); ++wordNumber; if ((rw.getText() != null && rw.getText().trim().equals("om.")) || lemma == null || lemma.length() == 0) continue; String id = readingID + String.format("%03d", wordNumber); Map record = new HashMap(); record.put("id", id); record.put("verseHash", s.getVerseHash()); record.put("book", vk.getBookAbbrev()); record.put("chapter", vk.getBookAbbrev() + "." + vk.getChapter()); record.put("verse", verseTextShort); record.put("bk", vk.getBook()); record.put("ch", vk.getChapter()); record.put("vs", vk.getVerse()); if (!"baseline".equals(readingLabel)) record.put("segment", cd); //params.getLogger().info("word: " + rw.getText()); record.put("readingLabel", readingLabel); boolean updatedWordNumber = false; boolean updatedWordNumberBook = false; if (lemma != null) { lemma = lemma.trim(); // find baseline word. This is a simple check for first occurance of the same lemma in the baseline within the segment portion ReadingWord baselineWord = null; if (!"baseline".equals(readingLabel)) { for (ReadingWord bw: baselineRemainingWords) { if (lemma.equals(bw.getLemma())) { baselineWord = bw; break; } } if (baselineWord != null) { baselineRemainingWords.remove(baselineWord); addWordParts(record, baselineWord, "baseword."); record.put("baseword.lemmaMatch", true); if (baselineWord.getWordNumberBook() != 0) { wordNumberBookLastBaseline = baselineWord.getWordNumberBook(); } // always set wordNumberBook rw.setWordNumberBook(wordNumberBookLastBaseline); updatedWordNumberBook = true; if (baselineWord.getWordNumberVerse() != 0) { // only set wordNumberVerse if baseword has a word number wordNumberVerseLastBaseline = baselineWord.getWordNumberVerse(); rw.setWordNumberVerse(wordNumberVerseLastBaseline); updatedWordNumber = true; } } // TODO: why are we not getting consecutive word numbers in Acts 1:12 6-8 if (!updatedWordNumber ) { rw.setWordNumberVerse(wordNumberVerseLastBaseline += 2); } if (!updatedWordNumberBook) { rw.setWordNumberBook(wordNumberBookLastBaseline += 2); } } } addWordParts(record, rw); StringBuffer wordContext = new StringBuffer(contextPre); if (wordContext.length() > 0) wordContext.append(" "); wordContext.append("["); int j = 0; for (ReadingWord rwc : sr.getReadingWords(subLabel)) { ++j; if (j > 1) wordContext.append(" "); if (j == wordNumber) wordContext.append("(" + rwc.getText() + ")"); else wordContext.append(rwc.getText()); } wordContext.append("]" + contextPost); record.put("baselineWordCount", baselineReadingWords.size()); record.put("readingWordCount", rws.size()); record.put("readingWordCountDifference", rws.size() - baselineReadingWords.size()); record.put("readingWordNumber", wordNumber); record.put("witnessCount", witnessCount); record.put("witnesses", witnesses); record.put("context", wordContext.toString()); record.put("contextBase", "[" + contextBase.toString()+"]"); wordRecords.add(record); } /* params.getLogger().info("About to start second pass. All wordRecords:"); for (Map record : wordRecords) { params.getLogger().info(Serializer.toJSON(record)); } */ // second pass at finding baseword match for (Map record : wordRecords) { ReadingWord baselineWord = null; if (!"baseline".equals(readingLabel)) { Integer wordNumberVerse = (Integer)record.get("wnVerse"); if (wordNumberVerse != null) { //params.getLogger().info("baselineRemainingWord: " + baselineRemainingWords.size() + " for " + lastVerse); for (ReadingWord bw: baselineRemainingWords) { // match on same word number //params.getLogger().info("Comparing: " + wordNumberVerse + " to " + bw.getWordNumberVerse()); if (wordNumberVerse == bw.getWordNumberVerse()) { //params.getLogger().info("Match!: " + wordNumberVerse + " to " + bw.getWordNumberVerse()); baselineWord = bw; break; } } } if (baselineWord != null) { baselineRemainingWords.remove(baselineWord); addWordParts(record, baselineWord, "baseword."); // always set wordNumberBook record.put("wnBook", baselineWord.getWordNumberBook()); record.put("wnVerse", baselineWord.getWordNumberVerse()); record.put("baseword.lemmaMatch", false); } } } try { //params.getLogger().info("Starting 3rd pass."); // third pass at finding baseword match for (Map record : wordRecords) { ReadingWord baselineWord = null; if (!"baseline".equals(readingLabel)) { //params.getLogger().info("readingLabel: " + readingLabel); Integer wordNumberVerse = (Integer)record.get("wnVerse"); //params.getLogger().info("wordNumberVerse: " + wordNumberVerse); String morph = (String)record.get("morph"); //params.getLogger().info("wordNumberVerse: " + wordNumberVerse + "; morph: " + morph); if (wordNumberVerse != null && ("CONJ".equals(morph) || "PREP".equals(morph))) { //params.getLogger().info("baselineRemainingWord: " + baselineRemainingWords.size() + " for " + lastVerse); for (ReadingWord bw: baselineRemainingWords) { String bwMorph = bw.getMorph(); //params.getLogger().info("Comparing: " + wordNumberVerse + " to " + bw.getWordNumberVerse()); if (bwMorph.equals(morph)) { //params.getLogger().info("Match!: " + wordNumberVerse + " to " + bw.getWordNumberVerse()); baselineWord = bw; break; } } } if (baselineWord != null) { baselineRemainingWords.remove(baselineWord); addWordParts(record, baselineWord, "baseword."); // always set wordNumberBook record.put("wnBook", baselineWord.getWordNumberBook()); record.put("wnVerse", baselineWord.getWordNumberVerse()); record.put("baseword.lemmaMatch", false); } } } //params.getLogger().info("Finished 3rd pass."); } catch(Exception e) { e.printStackTrace(); } // add remaining baseline words as om. word int omWordNumber = -1; if (!"baseline".equals(readingLabel)) { //params.getLogger().info("adding omBaseWords: " + baselineRemainingWords.size()); for (ReadingWord bw: baselineRemainingWords) { --omWordNumber; String id = readingID + String.format("%03d", (omWordNumber*-1)+900); Map record = new HashMap(); record.put("id", id); record.put("verseHash", s.getVerseHash()); record.put("book", vk.getBookAbbrev()); record.put("chapter", vk.getBookAbbrev() + "." + vk.getChapter()); record.put("verse", verseTextShort); record.put("bk", vk.getBook()); record.put("ch", vk.getChapter()); record.put("vs", vk.getVerse()); record.put("segment", cd); record.put("readingLabel", readingLabel); record.put("omToBaseline", true); record.put("baselineWordCount", baselineReadingWords.size()); record.put("readingWordCount", rws.size()); record.put("readingWordNumber", omWordNumber); record.put("witnessCount", witnessCount); record.put("witnesses", witnesses); record.put("contextBase", "[" + contextBase.toString()+"]"); addWordParts(record, bw, "baseword."); record.put("wnBook", bw.getWordNumberBook()); record.put("wnVerse", bw.getWordNumberVerse()); //params.getLogger().info("adding omBaseWord: " + Serializer.toJSON(record)); wordRecords.add(record); } } // check for transpositions checkTranspositions(wordRecords, params.getLogger()); segmentWordRecords.addAll(wordRecords); } //params.getLogger().info("finished sublabels for reading"); } //params.getLogger().info("finished readings"); // do some computation before pushing int baselineWordCount = baselineReadingWords.size(); int segmentReadingWordCountMin = baselineWordCount; int segmentReadingWordCountMax = baselineWordCount; for (Map record : segmentWordRecords) { if (!"baseline".equals(record.get("readingLabel"))) { Integer v = (Integer)record.get("readingWordCount"); if (v != null && v < segmentReadingWordCountMin) segmentReadingWordCountMin = v; if (v != null && v > segmentReadingWordCountMax) segmentReadingWordCountMax = v; } } String matchChecks[] = { "tvm", "cng", "tense", "voice", "mood", "case", "number", "gender" }; for (Map record : segmentWordRecords) { record.put("segmentReadingWordCountMin", segmentReadingWordCountMin); record.put("segmentReadingWordCountMax", segmentReadingWordCountMax); record.put("segmentReadingWordCountFluctuation", segmentReadingWordCountMax - segmentReadingWordCountMin); String bVal = null; String val = null; for (String checkKey : matchChecks) { val = (String)record.get(checkKey); bVal = (String)record.get("baseword."+checkKey); if (val != null && !val.isEmpty() && bVal != null && !bVal.isEmpty()) { record.put("baseword."+checkKey+"Match", val.equals(bVal)); } } } // push words to buffer for (Map record : segmentWordRecords) { bulkBuffer.append("{ \"index\": { \"_id\": \""+record.get("id").toString()+"\" }}\n"); bulkBuffer.append(Serializer.toJSON(record)+"\n"); ++bufferedWords; if (bufferedWords > BUFFEREDWORDSMAX) { params.getLogger().info("pushing data to elastic. records: " + bufferedWords + "; lastVerse: " + lastVerse); pushData(bulkBuffer, params.augmentGroupID != null ? params.augmentGroupID : params.segmentGroupID, params.testIndex ? "_test":"", out); bulkBuffer = new StringBuffer(); bufferedWords = 0; } } params.getLogger().info("pushed " + segmentWordRecords.size() + " segmentWordRecords to bulkBuffer; bufferedWords: " + bufferedWords); } if (bufferedWords > 0) { params.getLogger().info("pushing data to elastic. records: " + bufferedWords + "; lastVerse: " + lastVerse); pushData(bulkBuffer, params.augmentGroupID != null ? params.augmentGroupID : params.segmentGroupID, params.testIndex ? "_test":"", out); } params.getLogger().info("finished pushing to elastic."); return; } else params.format = "html"; Serializer.reportErrors(request, response, out, params, true); %>