<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ page trimDirectiveWhitespaces="true" %> <%@ page import="org.crosswire.utils.Sessions" %> <%@ page import="org.crosswire.utils.Utils" %> <%@ page import="org.crosswire.xml.XMLBlock" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Document" %> <%@ page import="org.crosswire.community.projects.ntmss.data.ShelfInstance" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Page" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Feature" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Document.SearchFilter" %> <%@ page import="org.crosswire.community.projects.ntmss.data.ProjectManagement" %> <%@ page import="org.crosswire.community.projects.ntmss.data.ProjectManagement.Project" %> <%@ page import="org.crosswire.utils.HTTPUtils" %> <%@ page import="java.util.HashMap" %> <%@ page import="java.util.HashSet" %> <%@ page import="java.util.StringTokenizer" %> <%@ page import="java.util.Date" %> <%@ page import="java.util.Set" %> <%@ page import="java.util.List" %> <%@ page import="java.util.Vector" %> <%@ page import="java.util.ArrayList" %> <%@ page import="java.util.UUID" %> <%@ page import="java.util.PriorityQueue" %> <%@ page import="java.util.Arrays" %> <%@ page import="java.text.SimpleDateFormat" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.Convert" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.WitnessReading" %> <%@ page import="org.crosswire.community.projects.ntmss.data.DocumentGroup" %> <%@ page import="org.crosswire.sword.keys.SWKey" %> <%@ page import="org.crosswire.sword.keys.VerseKey" %> <%@ page import="org.crosswire.sword.keys.ListKey" %> <%@ page import="org.crosswire.sword.modules.filters.SWFilter" %> <%@ page import="org.crosswire.sword.modules.filters.GreekAccents" %> <%@ page import="org.crosswire.repo.VersionedRepo" %> <%@ page import="org.json.JSONObject" %> <%@ page import="org.json.JSONArray" %> <%@ page import="org.apache.log4j.Logger" %> <%@ page import="org.crosswire.webtools.annotation.*" %> <%@ page import="org.crosswire.webtools.*" %> <%@ page import="javax.validation.constraints.NotNull" %> <%@ page import="javax.validation.constraints.Pattern" %> <%! @Description(value = "Retrieve transcription of a manuscript.", name = "transcript/get") public static class MyParameters extends Parameters { @Description(value = "manuscript id for which to retrieve a transcript. Can include multiple docIDs separated by a pipe, e.g., ", example = "10006 or 20001|20003 for both") @Pattern(regexp = "^\\d+([|,]\\d+)?$", message = "Please enter a docID or docID list separated by | or ,") public String docID = null; @Description(value = "manuscript ga number for which to retrieve a transcript.", example = "P46") public String gaNum = null; @Description(value = "manuscript page id for which to retrieve a transcript. Can include multiple pageIDs separated by a pipe or comma, ALL for all, or a range", example = "10,20|40,1-99999") @Pattern(regexp = "^(ALL|(\\d|\\d-\\d)([|,](\\d|\\d-\\d))?)$", message = "Please enter a pageID, pageID range separated by a -, or pageID list separated by | or ,") public String pageID = null; @Description(value = "indexContent (osisRef) for which to retrieve a transcript", example = "Matt.1.1, Matt.1, Matt, Matt-John") public String indexContent = null; @Description(value = "internal") public String biblicalContent = null; @Description(value = "internal") public String verse = null; @Pattern(regexp = "^((tei|teiraw|html|html_nohyphen|htmlfragment|plaintext|transtext|wce|chapter).*)?$", message = "See usage for valid response formats.") @Description(value = "specify the result format: tei, teiraw, htmlfragment, html[_nohyphen], plaintext, transtext[_nounclear][_nosupplied][_nopunct][_noaccents], wce, wce_new, chapterimp, download[=filename]", defaultValue = "tei", example = "teiraw") public String format = "tei"; @Description(value = "retrieve a transcription specifically owned by this user or project", defaultValue = "PUBLISHED", example = "joe") public String userName; @Description(value = "different from userName in that if preferUser doesn't have a transcription, then the PUBLISHED transcription will still be returned", example = "ernie") public String preferUser; @Description(value = "retrieve a previous version of the transcription", example = "cc8c174d08d844b5d757490f000acea29ef484d7") public String versionHash; @Description(value = "if set to true, retrieve version history for the transcription, instead of the actual transcription", defaultValue = "false", example = "true") public Boolean history = false; @Description(value = "used with 'history'. if set to true, retrieve also latest versions for all users", defaultValue = "false", example = "true") public Boolean allUsers = false; @Description(value = "list of witnesses which are members of a document group (-1 : all extant)", example = "12") public Integer documentGroupID; @Description(value = "base text module name to include in the transcription list", example = "NA28") public String baseText; @Description(value = "if retrieving by indexContent, whether or not to return full pages", defaultValue = "false", example = "true") public Boolean fullPage = false; @Description(value = "if retrieving TEI and part=\"I\" and part=\"F\" exist unnecessarily, join them", defaultValue = "false", example = "true") public Boolean joinParts = false; @Description(value = "if retrieving TEI, don't include full details in the header.", defaultValue = "false", example = "true") public Boolean briefTEIHeader = false; @Description(value = "include verse markers where verses are present in the versification, even if no transcription text exists", defaultValue = "false", example = "true") public Boolean includeEmpty = false; @Description(value = "whether or not to filter out diacritics, accents, etc.", defaultValue = "false", example = "true") public Boolean filterNoise = false; @Description(value = "only return copyright information", defaultValue = "false", example = "true") public Boolean getCopyright = false; @Override protected void customValidation() { if (this.format != null) super.format = this.format; if (indexContent == null) indexContent = biblicalContent; if (indexContent == null) indexContent = verse; if (docID == null && !getCopyright) addError(-4, "docID cannot be null"); } @Override protected void afterLoad() { docID = HTTPUtils.sanitize(docID); indexContent = HTTPUtils.sanitize(indexContent); gaNum = HTTPUtils.sanitize(gaNum); } @Override protected String[] getExamples() { return new String[] { "For Galations from P46: ?docID=10046&indexContent=Gal&fullPage=true&format=html" }; } } static HashMap specialFonts = new HashMap(); static HashSet rightJustify = new HashSet(); static { specialFonts.put("bo", "Antinoou, AntinoouWeb"); specialFonts.put("sa", "Antinoou, AntinoouWeb"); specialFonts.put("fa", "Antinoou, AntinoouWeb"); specialFonts.put("mae", "Antinoou, AntinoouWeb"); specialFonts.put("ly", "Antinoou, AntinoouWeb"); specialFonts.put("cw", "Antinoou, AntinoouWeb"); specialFonts.put("ar", "Arabic, ArabicWeb"); specialFonts.put("ara", "Arabic, ArabicWeb"); specialFonts.put("arb", "Arabic, ArabicWeb"); specialFonts.put("syc", "Estrangelo Edessa, EstreWeb"); specialFonts.put("chu", "BukyvedeWeb"); rightJustify.add("syc"); rightJustify.add("he"); rightJustify.add("hbo"); rightJustify.add("arb"); rightJustify.add("ar"); rightJustify.add("ara"); } public static WitnessReading loadModuleReading(String moduleName, String key) { String moduleServiceURL = "http://crosswire.org/study/fetchdata.jsp"; String params = "mod="+moduleName + "&key="+java.net.URLEncoder.encode(key) + "&format=strip"; StringBuffer result = HTTPUtils.postURL(moduleServiceURL, params); WitnessReading retVal = new WitnessReading("", result.toString()); retVal.setDocumentName(moduleName); return retVal; } public static void dumpCallInfo(HttpServletRequest request, Logger logger) { logger.info("collate called..."); logger.info("request.getContentType: " + request.getContentType()); logger.info("Headers: "); for (Object o: java.util.Collections.list(request.getHeaderNames())) { logger.info(o + "=" + request.getHeader(o.toString())); } logger.info("Attributes: "); for (Object o: java.util.Collections.list(request.getAttributeNames())) { logger.info(o + "=" + request.getAttribute(o.toString())); } logger.info("Parameters: "); for (Object o: java.util.Collections.list(request.getParameterNames())) { for (String v: request.getParameterValues(o.toString())) { logger.info(o + "=" + v); } } } %> <% MyParameters params = new MyParameters().loadFromRequest(request, response, false); //dumpCallInfo(request, params.getLogger()); // who we are String userName = Sessions.getSessionStringValue(request, "userName"); String groupNames = Sessions.getSessionStringValue(request, "userGroups"); // who we want to pull transcription for String targetUser=Transcription.assureUnicode(request.getParameter("userName")); String preferUser=Transcription.assureUnicode(request.getParameter("preferUser")); String uuid = "tid-" + UUID.randomUUID().toString(); SimpleDateFormat df = new SimpleDateFormat("yyyy"); String docID=params.docID; String gaNum=params.gaNum; if (docID == null) docID=request.getParameter("docid"); String pageID=request.getParameter("pageID"); if (pageID == null) pageID=request.getParameter("pageid"); boolean fullPage="true".equals(request.getParameter("fullPage")); boolean joinParts="true".equals(request.getParameter("joinParts")); String formats[]=request.getParameterValues("format"); String format=(formats != null && formats.length > 0)?formats[formats.length-1]:null; boolean supplied = (format != null && format.indexOf("nosupplied") < 0); boolean unclear = (format != null && format.indexOf("nounclear") < 0); boolean accents = (format != null && format.indexOf("noaccents") < 0); boolean punct = (format != null && format.indexOf("nopunct") < 0); boolean hyphen = (format != null && format.indexOf("nohyphen") < 0); boolean includeEndorsed="true".equals(request.getParameter("includeEndorsed")); String bookName = null; String baseText = request.getParameter("baseText"); int testament = 2; String v11n = null; String transcriptionAttributionDefault = Utils.getSysConfig(session).getProperty("TranscriptionAttributionDefault", ""); String transcriptionAttribution = Utils.getSysConfig(session).getProperty("TranscriptionAttributionDefault", ""); String transcriptionUsage = Utils.getSysConfig(session).getProperty("TranscriptionUsageSitewide", ""); String defaultFontSpec = Utils.getSysConfig(session).getProperty("DefaultFontSpec", "GentiumPlus"); if (!params.history && "PUBLISHED".equals(targetUser)) targetUser = null; String langOverride = null; if (targetUser != null) { Project p = ProjectManagement.getProject(targetUser); if (p != null) { String attribution = p.getTranscriptionAttribution(); if (attribution != null && attribution.length() > 0) transcriptionAttribution = attribution; String projConfig = p.getConfiguration(); if (projConfig != null && projConfig.length() > 0) { try { JSONObject json = new JSONObject(projConfig); langOverride = json.getString("lang"); params.getLogger().info("langOverride: " + langOverride); } catch (Exception e) { e.printStackTrace(); } if (langOverride != null) { langOverride = langOverride.trim(); if (langOverride.length() == 0) langOverride = null; } } } } transcriptionAttribution = transcriptionAttribution.replace("${yyyy}", df.format(new Date())); transcriptionAttributionDefault = transcriptionAttributionDefault.replace("${yyyy}", df.format(new Date())); transcriptionUsage = transcriptionUsage.replace("${yyyy}", df.format(new Date())); if (transcriptionAttribution != null && transcriptionAttribution.endsWith("|")) { transcriptionUsage = ""; transcriptionAttribution = transcriptionAttribution.split("\\|")[0]; } String lang = ""; Document lastDoc = null; SWFilter removeAccents = new GreekAccents(); if ((docID != null || gaNum != null || params.documentGroupID != null) && (pageID != null || params.indexContent != null) || params.getCopyright) { StringBuffer retVal = new StringBuffer(); List documents = new ArrayList(); params.getLogger().info("transcript/get(docID: " + docID + "; pageID: " + pageID + "; indexContent: " + params.indexContent + "; preferUser: " + preferUser + "; userName: " + targetUser + "; userAgent: " + request.getHeader( "User-Agent" )); if (docID != null) { StringTokenizer docIDTokens = new StringTokenizer(docID, "|"); while (docIDTokens.hasMoreTokens()) { String docIDString = docIDTokens.nextToken(); if (docIDString.startsWith("crosswire:")) { docIDString = docIDString.substring("crosswire:".length()); documents.add(new Document.CrossWireDocument(docIDString)); } else { try { int did = Integer.parseInt(docIDString); Document d = Document.getDocument(did); if (d != null) documents.add(d); } catch (Exception e) { e.printStackTrace(); } } } } else if (gaNum != null) { if (gaNum.indexOf("+") > -1) { gaNum = gaNum.split("\\+")[0]; } Document d = Document.getDocumentByGANumber(gaNum); if (d != null) documents.add(d); else { response.setContentType("text/xml"); %> <% return; } } List pageIDs = new ArrayList(); if (pageID != null && !"ALL".equals(pageID) && pageID.indexOf("-") == -1) { StringTokenizer pageIDTokens = new StringTokenizer(pageID, "|"); while (pageIDTokens.hasMoreTokens()) { int pid = Integer.parseInt(pageIDTokens.nextToken()); pageIDs.add(pid); } } // lookup pages for single doc based on biblicalContent; kindof a kludge but we need it if ((pageID != null && pageID.indexOf("-") > 0) || "ALL".equals(pageID) || (fullPage && documents.size() == 1 && params.indexContent != null)) { Document.SearchFilter searchFilter = new Document.SearchFilter(); searchFilter.currentUserID = userName; if (preferUser != null) { if (groupNames == null || groupNames.length() == 0) groupNames = ""; XMLBlock groupsBlock = new XMLBlock(groupNames); params.getLogger().info("groupsBlock: " + groupsBlock); if (groupsBlock != null) { groupsBlock.addBlock(new XMLBlock("")); } groupNames = groupsBlock.toString(); } params.getLogger().info("groupNames: " + groupNames); searchFilter.currentGroupIDs = groupNames; if (params.indexContent != null) searchFilter.biblicalContent = params.indexContent; searchFilter.docID = documents.get(0).getDocumentID(); String docV11n = documents.get(0).getV11n(); if (docV11n != null && docV11n.length() > 0) searchFilter.v11n = docV11n; if (searchFilter.biblicalContent != null && searchFilter.biblicalContent.length() < 1) searchFilter.biblicalContent = null; Document docs[] = Document.searchDocuments(true, searchFilter); params.getLogger().info("searchFilter: " + searchFilter); params.getLogger().info("docs.length: " + docs.length); int minPage = -1; int maxPage = -1; if (pageID != null && pageID.indexOf("-") > 0) { try { minPage = Integer.parseInt(pageID.substring(0, pageID.indexOf("-"))); } catch (Exception e) {} try { maxPage = Integer.parseInt(pageID.substring(pageID.indexOf("-")+1)); } catch (Exception e) {} } if (docs != null && docs.length == 1) { if (v11n == null || v11n.length() < 1) v11n = docs[0].getV11n(); for (Page p : docs[0].getPages()) { if (minPage > -1 && p.getPageID() < minPage) continue; if (maxPage > -1 && p.getPageID() > maxPage) continue; params.getLogger().debug("Adding page: " + p.getPageID()); pageIDs.add(p.getPageID()); } } } if (params.documentGroupID != null) { if (params.documentGroupID > -1) { DocumentGroup dg = DocumentGroup.getDocumentGroup(params.documentGroupID); if (dg == null) { response.setContentType("text/xml"); %> <% return; } for (Document d : dg.getDocuments()) { documents.add(d); } } else { // -1 is pseudo-document group for all extant witnesses for (WitnessReading wr : Transcription.getWitnessReadings(-1, params.indexContent, false, false, false, null)) { Document d = Document.getDocument(wr.getDocumentID()); if (d != null) documents.add(d); } } } Page lastPage = null; for (Document doc : documents) { lastPage = null; if (doc != null) { v11n = doc.getV11n(); params.getLogger().debug("setting v11n: " + v11n); lastDoc = doc; lang = doc.getLanguage(); if (lang != null && lang.indexOf("g") > -1) lang = "grc"; if (langOverride != null) lang = langOverride; String t = ""; if (pageID != null || fullPage) { if (!params.history) { for (int pid : pageIDs) { String tp = null; if (preferUser != null) tp = doc.getTranscriptionPage(pid, preferUser, params.versionHash); if (tp == null) tp = doc.getTranscriptionPage(pid, targetUser, params.versionHash, includeEndorsed); Page p = Page.getPage(doc.getDocumentID(), pid); lastPage = p; params.getLogger().debug("page: " + pid); if (p != null) { if (p.getVerses().length > 0) testament = p.getVerses()[0]/1000000000; v11n = p.getV11n(); } if (tp != null) { params.getLogger().debug("page: " + pid); // if we're requesting full page, or we have more than one page, let's strip any headers // added by the editor if (fullPage || pageIDs.size() > 1) { int start = tp.indexOf(""); int end = tp.indexOf(""); if (start < 0 && end < 0 && tp.startsWith("") - 5; end = tp.length(); } params.getLogger().debug("start / end: " + start + " / " + end); if (start > -1 && end > -1) tp = tp.substring(start+6, end); // what the frick was this for? It's messing things up now so I am removing it // else if (tp.startsWith(" <% for (VersionedRepo.History h : doc.getTranscriptionPageHistory(Integer.parseInt(pageID), targetUser, params.allUsers)) { out.print(h.toFormattedXML()); } %> <% return; } } else { VerseKey vk = new VerseKey(); vk.setIntros(true); if (v11n != null) vk.setVersificationSystem(v11n); //params.getLogger().debug("v11n: " + v11n); String savedIndexContent = null; int targetVerse = 0; if ("chapter".equals(format)) { vk.setText(params.indexContent); savedIndexContent = params.indexContent; int ch = vk.getChapter(); int vs = vk.getVerse(); if (ch == 0) ch = 1; else if (ch == 1 && vs == 0) { vk.setPosition(VerseKey.MAXCHAPTER); vk.setVerse(0); ch = vk.getChapter(); } params.indexContent = vk.getBookName() + "." + ch; targetVerse = vk.getVerse(); } ListKey verses = vk.ParseVerseList(params.indexContent, "Mat.1.1", true); if (verses != null && verses.getElement() != null) { if ("chapter".equals(format)) { params.indexContent = savedIndexContent; retVal.append(""); verses.setPosition(VerseKey.BOTTOM); VerseKey ub = ((VerseKey)verses.getElement()).getUpperBound(); VerseKey bookLast = (VerseKey)ub.clone(); bookLast.setPosition(VerseKey.MAXCHAPTER); bookLast.setPosition(VerseKey.MAXVERSE); if (ub.equals(bookLast)) { ub.setIntros(true); ub.setVerse(0); ub.setChapter(1); verses.add(ub); verses.setPosition(VerseKey.BOTTOM); int ch = ((VerseKey)verses.getElement()).getChapter(); int vs = ((VerseKey)verses.getElement()).getVerse(); //params.getLogger().info("adding subscriptio: " + ch + ":" + vs); } verses.setPosition(VerseKey.TOP); if (((VerseKey)verses.getElement(0)).getLowerBound().getChapter() == 1) { VerseKey lb = ((VerseKey)verses.getElement(0)).getLowerBound(); lb.setIntros(true); lb.setVerse(0); lb.setChapter(0); ((VerseKey)verses.getElement(0)).setLowerBound(lb); verses.setPosition(VerseKey.TOP); int ch = ((VerseKey)verses.getElement(0)).getChapter(); //params.getLogger().info("setting lowerbound to chapter 0: " + ch); } } verses.setPosition(VerseKey.TOP); VerseKey evk = (VerseKey)verses.getElement(); testament = evk.getTestament(); int book = evk.getBook(); int chapter = evk.getChapter(); String subscriptio = ""; //params.getLogger().info("chapter: " + chapter); t += "
"; if (chapter == 0) { t += "
"; } else { t += "
"; } for (verses.setPosition(VerseKey.TOP); verses.popError() == 0; verses.increment()) { VerseKey verseKey = (VerseKey)verses.getElement(); //params.getLogger().info("verseKey: " + verseKey); int verse = verseKey.getVerse(); if (verseKey.getChapter() == 1 && verseKey.getVerse() == 0) { subscriptio = "
"; String vt = null; if (preferUser != null) vt = doc.getTranscriptionVerse(verseKey.getHashNumber(), preferUser, false); if (vt == null) vt = doc.getTranscriptionVerse(verseKey.getHashNumber(), targetUser, !params.includeEmpty, includeEndorsed); if ((verseKey.getVerse() == 0 || verseKey.getChapter() == 0 || verseKey.getBook() == 0) && "".equals(vt)) {} else subscriptio += vt; subscriptio += "
"; if (verses.getElement() == verses.getElement(0) || !"chapter".equals(params.format)) continue; } if (book != verseKey.getBook()) { t += "
"; t += subscriptio; subscriptio = ""; } if (book != verseKey.getBook() || chapter != verseKey.getChapter()) { t += "
"; } if (book != verseKey.getBook()) { book = verseKey.getBook(); t += "
"; chapter = verseKey.getChapter(); if (chapter == 0) { t += "
"; } else { t += "
"; } } if (book != verseKey.getBook() || chapter != verseKey.getChapter()) { chapter = verseKey.getChapter(); if (chapter == 0) { t += "
"; } else { t += "
"; } } String vt = null; if (preferUser != null) vt = doc.getTranscriptionVerse(verseKey.getHashNumber(), preferUser, false); //params.getLogger().info("preferUser Transcription: " + doc.getDocumentIDString() + " : " + preferUser + " : " + vt); if (vt == null) vt = doc.getTranscriptionVerse(verseKey.getHashNumber(), targetUser, !params.includeEmpty, includeEndorsed); //params.getLogger().info("targetUser Transcription: " + targetUser + " : " + vt); params.getLogger().debug("Transcription raw (" + verseKey + "): " + vt); if (vt != null) { vt = vt.replaceAll("]*>", ""); vt = vt.replaceAll("
", ""); } params.getLogger().debug("Transcription cleaned (" + verseKey + "): " + vt); if ((verseKey.getVerse() == 0 || verseKey.getChapter() == 0 || verseKey.getBook() == 0) && "".equals(vt)) {} else { if (vt != null && vt.trim().length() > 0) t += vt; else if (params.includeEmpty && verse > 0) t +=String.format(" ", book, chapter, verse); } if ("chapter".equals(format)) { String verseText = Convert.getTEIDisplayHTML(vt, testament, v11n, lastPage != null && lastPage.getVerses().length > 0 ? new HashSet(Arrays.asList(lastPage.getVerses())) : null, true); verseText = verseText.replaceAll("]*class=\"verse_number\">[^<]*", ""); verseText = verseText.replaceAll(" *]*class=\"punctuation\">([^<]*)", "$1"); verseText = verseText.replaceAll("]*class=\"abbr[^>]*>([^<]*)", "$1"); retVal.append("
\n"); } } t += ""; t += subscriptio; subscriptio = ""; t += ""; if ("chapter".equals(format)) { params.indexContent = savedIndexContent; retVal.append("
" + verseText + "
"); } } } if (t == null) t = ""; if ("html".equals(format) || "htmlfragment".equals(format) || "xhtml".equals(format)) { //params.getLogger().info("t: " + t); retVal.append(t != null && t.length() > 0 ? Convert.getTEIDisplayHTML(t, testament, v11n, lastPage != null && lastPage.getVerses().length > 0 ? new HashSet(Arrays.asList(lastPage.getVerses())) : null) : ("No Transcription Available for GA: "+ doc.getGANumber() + ((pageID!=null)?(" page: "+pageID) : (" indexContent: "+ params.indexContent)))); } else if (format != null && format.startsWith("transtext")) { //params.getLogger().info("t: " + t); retVal.append(""); if (t != null && t.length() > 0) { boolean first = true; try { for (WitnessReading wr : Convert.getTEITranscriptionText(t, supplied, unclear, punct, accents)) { if (!first) retVal.append("|"); String siglum = wr.getDocumentName(); siglum = (siglum == null || siglum.length() < 0) ? wr.getHand() : siglum + (wr.getHand().length() > 0 ? "-":"") + wr.getHand(); retVal.append(siglum + "|" + HTTPUtils.canonize(wr.getText())); first = false; } } catch (Exception e) { if (!first) retVal.append("|"); retVal.append(""+doc.getDocumentIDString() + "-ERROR|Error in transcription: " + HTTPUtils.canonize(e.toString())); } } else { retVal.append(""); } retVal.append(""); } else if (format != null && format.startsWith("wce")) { // What should we do if we don't have any transcription? // Well, let's set it to {lac} if (t == null || t.trim().length() == 0) t = "{lac}"; String l = doc.getLanguage(); if (l.indexOf("g") > -1) l = "grc"; JSONObject o = new JSONObject(); JSONArray witnesses = new JSONArray(); StringBuffer allABs = new StringBuffer(t); t = Transcription.popElement(allABs, "ab"); if (t == null) t = allABs.toString(); boolean multiInstance = false; int duplicatePosition = 0; while (t != null) { String nextT = Transcription.popElement(allABs, "ab"); if (nextT != null) multiInstance = true; if (t != null && t.length() > 0) { o = new JSONObject(); witnesses = new JSONArray(); String instanceSuffix = (multiInstance) ? instanceSuffix = "+" + Transcription.getAttribute(t, "xml:id") : ""; String siglum = doc.getGANumber()+instanceSuffix; o.put("tei", t); o.put("_id", doc.getDocumentIDString()+instanceSuffix); o.put("transcription", doc.getDocumentIDString()); // o.put("transcription_id", doc.getDocumentIDString()); if (multiInstance) { // o.put("duplicate_position", Integer.toString(++duplicatePosition)); // o.put("index", Integer.toString(duplicatePosition-1)); } o.put("language", l); o.put("siglum", siglum); o.put("context", params.indexContent); o.put("transcription_siglum", doc.getGANumber()); o.put("document_id", doc.getDocumentIDString()); for (WitnessReading wr : Convert.getTEIWCEText(t, true, true, false, accents, (format.endsWith("_new") ? "\u0323" : "_"))) { wr.setDocumentName(siglum); wr.setDocumentIDString(lastDoc.getDocumentIDString()); //params.getLogger().info("WR: " + wr); JSONObject w = new JSONObject(); boolean anyGap = false; JSONArray tokens = new JSONArray(); int index = 2; // {om} means we want-- tokens : [] if (!"{lac}".equals(wr.getText()) && (!"{om}".equals(wr.getText()))) { String gapDetails = null; JSONObject lastToken = null; String tokensStrings[] = wr.getTokens(); //params.getLogger().info("tokens: " + Arrays.toString(tokensStrings)); for (String tokenText : tokensStrings) { //params.getLogger().info("token: *"+tokenText+"*"); String extent2 = null; String unit2 = null; String reason2 = null; JSONObject token = new JSONObject(); // handle tokens with {gap...} int begin = tokenText.indexOf("{gap"); //params.getLogger().info("begin: " + begin); if (begin > -1) { anyGap = true; int end = tokenText.indexOf('}', begin); //params.getLogger().info("tokenText: " + tokenText); //params.getLogger().info("end: " + end); String tag = tokenText.substring(begin, end); String extent = Transcription.getAttribute(tag, "extent"); String unit = Transcription.getAttribute(tag, "unit"); String reason = Transcription.getAttribute(tag, "reason"); // check for a second {gap} to see if our first word is a final portion int begin2 = tokenText.indexOf("{gap", end); //params.getLogger().info("begin: " + begin); if (begin2 > -1) { int end2 = tokenText.indexOf('}', begin2); if (end2 > -1) { String tag2 = tokenText.substring(begin2, end2); extent2 = Transcription.getAttribute(tag2, "extent"); unit2 = Transcription.getAttribute(tag2, "unit"); reason2 = Transcription.getAttribute(tag2, "reason"); tokenText = tokenText.substring(0, begin2) + tokenText.substring(end2+1); } } String gDetails = (reason == null|| reason.trim().length() == 0)?"gap":reason; gDetails += (extent == null || extent.trim().length() == 0)?"":(" " + extent); gDetails += (unit == null || unit.trim().length() == 0)?"":(" " + unit); //params.getLogger().info("gapDetails: " + gapDetails); // if we're entirely a gap if (begin == 0 && end == (tokenText.length()-1)) { // are we first? save our gap details for first available token if (lastToken == null) { gapDetails = gDetails; continue; } else { lastToken.put("gap_after", true); lastToken.put("gap_details", gDetails); } continue; } else { //params.getLogger().info("We're not entirely a gap."); // we're at the end of a token if (end == tokenText.length()-1) { //params.getLogger().info("We're at the end."); token.put("gap_after", true); token.put("gap_after_details", gDetails); token.put("gap_details", gDetails); } // we're at the beginning of a token else if (begin == 0) { //params.getLogger().info("We're at the beginning."); token.put("gap_before", true); token.put("gap_before_details", gDetails); token.put("gap_details", gDetails); } // we're in the middle of a token :( else { token.put("gap_details", gDetails); } String firstHalf = tokenText.substring(0,begin); String lastHalf = tokenText.substring(end+1); if ("char".equals(unit)) { tokenText = firstHalf + "["+extent+"]"+ lastHalf; } tokenText = firstHalf+lastHalf; } } token.put("index", Integer.toString(index)); if (gapDetails != null) { token.put("gap_before", true); token.put("gap_before_details", gapDetails); gapDetails = null; } if (tokenText.indexOf("|ns") > -1) { tokenText = tokenText.replace("|ns", ""); tokenText = tokenText.replace("\u0305", ""); token.put("nomSac", true); } if (tokenText.indexOf("_") > -1 || tokenText.indexOf("\u0323") > -1) token.put("unclear", true); if ((tokenText.indexOf("[") > -1) || (tokenText.indexOf("]") > -1)) token.put("supplied", true); //params.getLogger().info("token: *"+tokenText+"*"); //params.getLogger().info("lastChar: " + tokenText.charAt(tokenText.length()-1)); if (tokenText.length() > 0 && Transcription.isPunctuation(tokenText.charAt(tokenText.length()-1))) { //params.getLogger().info("found PC"); token.put("pc_after", "" + tokenText.charAt(tokenText.length()-1)); tokenText = tokenText.substring(0, tokenText.length()-1); if (tokenText.length() > 0 && Transcription.isPunctuation(tokenText.charAt(tokenText.length()-1))) { //params.getLogger().info("found PC"); token.put("pc_after", "" + tokenText.charAt(tokenText.length()-1) + token.get("pc_after").toString()); tokenText = tokenText.substring(0, tokenText.length()-1); } } token.put("siglum", siglum); token.put("verse", params.indexContent); // remove []_ and underdot from our token for collation //params.getLogger().info("token: *"+tokenText+"*"); String unaccented = tokenText; if ("word".equals(unit2) && "final portion".equals(extent2)) { unaccented = "[...]" + unaccented; } if (!accents) { StringBuffer trans = new StringBuffer(unaccented); removeAccents.processText(trans, null, null); unaccented = trans.toString(); } String cleanedToken = unaccented.replaceAll("[\\[\\]_\u0323]", ""); //params.getLogger().info("cleanedToken: *"+cleanedToken+"*"); if (cleanedToken == null || cleanedToken.trim().length() < 1 || unaccented == null || unaccented.trim().length() < 1) { cleanedToken="SOMETHINGISBROKENHERE"; unaccented="SOMETHINGISBROKENHERE"; } token.put("t", cleanedToken); JSONArray ruleMatch = new JSONArray(); ruleMatch.put(unaccented); token.put("rule_match", ruleMatch); token.put("reading", wr.getLabel()); token.put("original", unaccented); tokens.put(token); lastToken = token; index += 2; } } w.put("tokens", tokens); w.put("id", wr.getLabel()); // {lac} means we want-- nothing for the entire witness // here no witnesses, and below if !witnesses.length, no output // is this true? params.getLogger().info("Witness: " + w.toString() +"; tokens.size: " + tokens.length()); if (!"{lac}".equals(wr.getText()) && ((tokens != null && tokens.length() > 0) || !anyGap)) { witnesses.put(w); } } o.put("witnesses", witnesses); // if we have no witnesses then we are completely {lac} // and should not output anything // is this true? if (witnesses.length() > 0) { if (retVal.length() > 0) retVal.append(", "); retVal.append(o.toString()); } } t = nextT; } } else if ("plaintext".equals(format)) { if (t != null && t.length() > 0) { retVal.append(Convert.getTEIPlainText(t)); } else { retVal.append("No Transcription Available for GA: "+ doc.getGANumber() + ((pageID!=null)?(" page: "+pageID) : (" indexContent: "+ params.indexContent))); } } // TEI and wce else if (!"chapter".equals(format)) { if (t.length() > 5) { retVal.append(t); } else { retVal.append(""); } } } else { retVal.append(""); } } String contributorAttribution = ""; if (lastDoc != null) { Feature f = lastDoc.getFeature("TranscriptionTranscribers"); if (f != null) contributorAttribution += "Transcribers: " + f.getStringVal1(); f = lastDoc.getFeature("TranscriptionRevisors"); if (f != null) contributorAttribution += (contributorAttribution.length() > 0 ? "; " : "") + "Revisors: " + f.getStringVal1(); f = lastDoc.getFeature("TranscriptionCollaborators"); if (f != null) contributorAttribution += (contributorAttribution.length() > 0 ? "; " : "") + "Collaborators: " + f.getStringVal1(); f = lastDoc.getFeature("TranscriptionExtraAttribution"); if (f != null) contributorAttribution += (contributorAttribution.length() > 0 ? "
" : "") + f.getStringVal1(); } if (lastPage != null) { String folioPrefix = lastPage.getFolioNumber(); if (folioPrefix == null || folioPrefix.trim().length() < 1) folioPrefix = ""; else folioPrefix = "Page " + folioPrefix.trim() + " "; Feature f = lastPage.getFeature("TranscriptionEditors"); if (f != null) contributorAttribution += folioPrefix + "Editors: " + f.getStringVal1(); f = lastPage.getFeature("TranscriptionRevisors"); if (f != null) contributorAttribution += (contributorAttribution.length() > 0 ? "; " : "") + folioPrefix + "Revisors: " + f.getStringVal1(); f = lastPage.getFeature("TranscriptionCollaborators"); if (f != null) contributorAttribution += (contributorAttribution.length() > 0 ? "; " : "") + folioPrefix + "Collaborators: " + f.getStringVal1(); f = lastPage.getFeature("TranscriptionExtraAttribution"); if (f != null) contributorAttribution += (contributorAttribution.length() > 0 ? "
" : "") + folioPrefix + f.getStringVal1(); } StringBuffer copyrightBlock = new StringBuffer(); if (params.getCopyright) { copyrightBlock.append(""); copyrightBlock.append("" + HTTPUtils.canonize(transcriptionAttributionDefault) + ""); copyrightBlock.append("" + HTTPUtils.canonize(transcriptionAttribution) + ""); copyrightBlock.append("" + HTTPUtils.canonize(transcriptionUsage) + ""); copyrightBlock.append("" + HTTPUtils.canonize(contributorAttribution) + ""); copyrightBlock.append(""); } if (baseText != null) { String t = loadModuleReading(baseText, params.indexContent).getText(); String l = "grc"; retVal.append(""); retVal.append("|" + HTTPUtils.canonize(t)); retVal.append(""); } params.getLogger().debug("***** transcript/get ************** finised with result buffer size: " + retVal.length()); if (!params.getCopyright && retVal.toString().startsWith(" <% } else { response.setContentType("text/plain"); } %> <% if (!"htmlfragment".equals(format)) { %> <% } %>
dir="rtl" <% } %> > <%= retVal %>




<% if (!"htmlfragment".equals(format)) { %> <% } } else if ("chapter".equals(format)) { response.setContentType("text/plain"); VerseKey vk = new VerseKey(); vk.setIntros(true); if (v11n != null) vk.setVersificationSystem(v11n); vk.setText(params.indexContent); out.print(vk.getShortText()+"%%%"); out.print(retVal); } else if (format != null && format.startsWith("wce")) { response.setContentType("application/json"); out.print("["); out.print(retVal); out.print("]"); } else if (format != null && format.startsWith("transtext")) { response.setContentType("text/xml"); String vtext = ""; if (params.indexContent != null) { VerseKey vk = new VerseKey(); vk.setIntros(true); if (v11n != null) vk.setVersificationSystem(v11n); vk.setText(params.indexContent); vtext = " indexContent=\""+vk.getBKVText()+"\""; } %><%=baseText!=null?" baseText=\""+baseText+"\"":""%>><%= retVal %><% } else if ("plaintext".equals(format)) { response.setContentType("text/plain"); %><%= retVal %><% } // teiraw //else if ("teiraw".equals(format)) { else { response.setContentType("text/xml"); boolean headerPresent = (retVal.indexOf(" -1); boolean richHeaderPresent = (retVal.indexOf("") > -1); // if (params.briefTEIHeader && headerPresent) { // always get rid of header if it's not likely a full header //params.getLogger().info("headerPresent: " + headerPresent + "; richHeaderPresent: " + richHeaderPresent); //params.getLogger().info("transcription: " + retVal); if (headerPresent && !richHeaderPresent) { int endHeaderOffset = retVal.indexOf(""); if (endHeaderOffset > -1) retVal = new StringBuffer(retVal.substring(endHeaderOffset+6)); endHeaderOffset = retVal.lastIndexOf(""); if (endHeaderOffset > -1) retVal = new StringBuffer(retVal.substring(0, endHeaderOffset)); headerPresent = false; } // why are we only adding a header if we don't start with '")+1).trim()); } %> <% if (!"teiraw".equals(format)) { %> <% } if (!headerPresent) { Set wits= new HashSet(); for (WitnessReading wr : Convert.getTEITranscriptionText(retVal.toString(), true, true, true, true)) { wr.setDocumentName(lastDoc.getGANumber()+wr.getDocumentName()); wr.setDocumentIDString(lastDoc.getDocumentIDString()); wits.add(wr.getHand()); } %> <% if (!params.briefTEIHeader) { %> <%=lastDoc.getGANumber()%> The Institut für neutestamentliche Textforschung

]]>

]]>

]]>

<% for (ShelfInstance si : lastDoc.getShelfInstances()) { %> <%=si.getInstitutionCountry()%> <%=si.getInstitutionPlace()%> <%=HTTPUtils.canonize(si.getInstitutionName())%> <%=si.getShelfNumber()%> <% } String l = lastDoc.getLanguage(); if ("g".equals(l)) l = "grc"; %>
<% PriorityQueue wits2 = new PriorityQueue(wits); while (!wits2.isEmpty()) { String w = wits2.poll(); if ("firsthand".equals(w) || w.length() < 1) continue; %> <% } %>
<% } %> > <% } if (params.filterNoise) { retVal = new StringBuffer(retVal.toString() .replaceAll("", "") .replaceAll("]*>", "") .replaceAll("", "") ); removeAccents.processText(retVal, null, null); retVal = new StringBuffer(Transcription.stripPunctuation(retVal.toString(), true)); /* Vector wr = Convert.getTEITranscriptionText(retVal.toString(), false, false, false, false); if (wr != null && wr.size() > 0) { retVal = new StringBuffer(wr.get(0).getText() .replaceAll("\\[([^-])", "$1") .replaceAll("([^-])\\]", "$1")); } */ } // cleanups for transcription output retVal = new StringBuffer(Transcription.fixTranscription(retVal.toString())); %> <% } %> <%= retVal %> <% if (!retVal.toString().trim().startsWith("
<% } } } } else { ((Parameters)params).format = "html"; Serializer.reportErrors(request, response, out, params, true); } %>