<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ page trimDirectiveWhitespaces="true" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Document" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Page" %> <%@ page import="org.crosswire.sword.keys.VerseKey" %> <%@ page import="org.crosswire.sword.keys.ListKey" %> <%@ page import="org.crosswire.utils.Utils" %> <%@ page import="org.crosswire.utils.HTTPUtils" %> <%@ page import="java.io.File" %> <%@ page import="java.util.Date" %> <%@ page import="java.util.Set" %> <%@ page import="java.util.HashSet" %> <%@ page import="java.util.Arrays" %> <%@ page import="java.util.Collections" %> <%@ page import="java.util.Map" %> <%@ page import="java.util.TreeMap" %> <%@ page import="java.util.HashMap" %> <%@ page import="java.util.List" %> <%@ page import="java.util.ArrayList" %> <%@ page import="java.util.Comparator" %> <%@ page import="java.io.FilenameFilter" %> <%@ page import="java.io.FileFilter" %> <%@ page import="org.crosswire.repo.VersionedRepo" %> <%@ page import="javax.validation.constraints.Pattern" %> <%@ page import="org.crosswire.webtools.annotation.*" %> <%@ page import="org.crosswire.webtools.*" %> <%@ page import="org.apache.commons.codec.digest.DigestUtils" %> <%! static HashMap specialFonts = new HashMap(); static HashSet rightJustify = new HashSet(); static { specialFonts.put("bo", "Antinoou, AntinoouWeb"); specialFonts.put("sa", "Antinoou, AntinoouWeb"); specialFonts.put("fa", "Antinoou, AntinoouWeb"); specialFonts.put("mae", "Antinoou, AntinoouWeb"); specialFonts.put("ly", "Antinoou, AntinoouWeb"); specialFonts.put("cw", "Antinoou, AntinoouWeb"); specialFonts.put("syc", "Estrangelo Edessa, EstreWeb"); specialFonts.put("chu", "BukyvedeWeb"); rightJustify.add("syc"); rightJustify.add("he"); rightJustify.add("arb"); } static final int MATCH_EXCERPT_CONTEXT = 40; static final java.util.regex.Pattern lbPattern = java.util.regex.Pattern.compile(" { protected int minDocID = -1; protected int maxDocID = -1; protected int minPageID = -1; protected int maxPageID = -1; protected Set confirmStrings = new HashSet(); protected java.util.regex.Pattern textPattern = null; @Description(value = "Limit results to user who owns the transcription. Use \"ENDORSEDPROJECTS\" to search all transcriptions owned by projects endorsed in sysconfic.properties: TranscriptionIncludeProjects", example = "jsmith") public String userName = null; @Description(value = "Limit results to a docID. Can also be a range", example = "91035-91037") @Pattern(regexp = "^[0-9]*(-[0-9]*)?$", message = "Please give a docID or docID range with min and max values separated by a - (dash).") public String docID = null; @Description(value = "Limit results to a pageID. Can also be a range", example = "10-550") @Pattern(regexp = "^[0-9]*(-[0-9]*)?$", message = "Please give a pageID or pageID range with min and max values separated by a - (dash).") public String pageID = null; @Description(value = "Limit results to a indexContent", example = "Ps.90.1") public String indexContent = null; @Description(value = "Limit results to pages which contain a regular expression text pattern in the TEI data", example = "(]*>)\\.()") public String text; @Description(value = "Same as {text} but will be converted from iso8859-1 to utf-8", example = "(]*>)\\.()") public String textConvert; @Description(value = "Replacement string to substitute for matches to above {text} parameter. Parenthesized submatches can be specified with $n, e.g., $1 to represent the first parenthesis match from the {term} regular expression.", example = "$1;$2") public String replace; @Description(value = "Same as {replace} but will be converted from iso8859-1 to utf-8", example = "$1;$2") public String replaceConvert; @Description(value = "Perform an update of the transcription repository at the point specified by the confirm string specified. This string is obtained from the 'confirm' attribute on the replace element of a match. Can be specified multiple times", example = "d1b6f246c1c34ec2a8c9a81dcd84aa40") public String confirm; @Description(value = "detail of results: document, page", defaultValue = "document") public String detail; @Override protected void afterLoad() { } @Override protected void customValidation() { if (docID != null) { String range[] = docID.split("-"); minDocID = Integer.parseInt(range[0]); maxDocID = Integer.parseInt(range[range.length-1]); } if (pageID != null) { String range[] = pageID.split("-"); minPageID = Integer.parseInt(range[0]); maxPageID = Integer.parseInt(range[range.length-1]); } if (minDocID != -1 && minPageID == -1 && indexContent != null && indexContent.trim().length() > 0) { Document.SearchFilter searchFilter = new Document.SearchFilter(); searchFilter.docID = minDocID; searchFilter.biblicalContent = indexContent; Document docs[] = Document.searchDocuments(true, searchFilter); //getLogger().info("searchFilter: " + searchFilter); //getLogger().info("docs.length: " + docs.length); if (docs != null && docs.length == 1) { //getLogger().info("docs[0].getPages()length: " + docs[0].getPages().length); for (Page p : docs[0].getPages()) { if (minPageID == -1 || p.getPageID() < minPageID) minPageID = p.getPageID(); if (maxPageID == -1 || p.getPageID() > maxPageID) maxPageID = p.getPageID(); } } //getLogger().info("minPageID: " + minPageID); //getLogger().info("maxPageID: " + maxPageID); } if (userName == null && minDocID == -1 && detail == null) { addError(-1, "Must specify one of: docID, or userName"); } if (textConvert != null) { text = Transcription.assureUnicode(textConvert); } if (text != null) { textPattern = java.util.regex.Pattern.compile(text); } if (replaceConvert != null) { replace = Transcription.assureUnicode(replaceConvert); } String confirms[] = request.getParameterValues("confirm"); if (confirms != null) confirmStrings.addAll(Arrays.asList(confirms)); if (detail == null) detail = "document"; } @Override protected boolean permissionValidation(HttpServletRequest request, HttpServletResponse response) { return true; } } %> <% MyParameters params = new MyParameters().loadFromRequest(request, response); if (params.getErrors().size() == 0) { String transcriptionPublishedName = Utils.getSysConfig(session).getProperty("TranscriptionPublishedName", "PUBLISHED"); String defaultFontSpec = Utils.getSysConfig(session).getProperty("DefaultFontSpec", "GentiumPlus"); String committingUserName = params.getUser() != null ? params.getUser().getUserName() : null; if (committingUserName == null) committingUserName = ""; List docs = null; response.setContentType("text/xml"); boolean pagesSaved = false; ListKey verseRangeVK = null; if (params.indexContent != null) { verseRangeVK = new VerseKey().ParseVerseList(params.indexContent, "Mat.1.1", true); } // --------------------------------------------- String searchString = ""; Set endorsedProjects = null; if ("ENDORSEDPROJECTS".equals(params.userName)) { String transcriptionIncludeProjects = Utils.getSysConfig(session).getProperty("TranscriptionIncludeProjects", ""); endorsedProjects = new HashSet(Arrays.asList(transcriptionIncludeProjects.split("\\|"))); params.getLogger().info(endorsedProjects); } // let's check to see if we're a privileged user boolean isAdmin = params.getUser() != null && ( params.getUser().hasRole("VMR Administrator") || params.getUser().hasRole("Transcription Manager") || params.getUser().hasRole("Transcription Publisher")); // searching for transcriptions of a particular docID if (params.minDocID != -1) { // searching for transcriptions of a particular docID+pageID Document doc = Document.getDocument(params.minDocID); if (doc != null) { List paths = new ArrayList(); // TODO: fold endorsedProjects logic into the else below, but for now this is fast and works if (endorsedProjects != null) { for (String user : endorsedProjects) { if (doc.isTranscriptionPageExists(params.minPageID, user)) { paths.add(new File(Integer.toString(params.minDocID)+"/initial/"+user+"/"+params.minPageID+".xml")); } } } else { String path = Integer.toString(params.minDocID); File potential = null; // check for PUBLISHED transcription // yeah, so, since we get the username below from the 'initial/ portion, we hack this :( // NB: we are not using 'path' in the check for exists here if (params.userName == null || "PUBLISHED".equals(params.userName)) { if (params.minPageID != -1 && params.maxPageID == -1) { potential = new File(path + "/initial/PUBLISHED/" + Integer.toString(params.minPageID) + ".xml"); if (doc.isTranscriptionPageExists(params.minPageID, null)) { paths.add(potential); } } else { for (File p : VersionedRepo.search(path + "/*.xml")) { try { int pID = Integer.parseInt(p.getName().substring(0, p.getName().indexOf("."))); if (pID >= params.minPageID && (pID <= params.maxPageID || params.maxPageID == -1)) { potential = new File(path + "/initial/PUBLISHED/" + Integer.toString(pID) + ".xml"); paths.add(potential); } } catch (Exception e) { e.printStackTrace(); } } } } // end check for PUBLISHED path += "/initial/"; // check for user transcriptions File[] users = null; if (params.userName != null) { users = new File[1]; users[0] = new File(params.userName); } else users = VersionedRepo.getFolders(path); //params.getLogger().info("user: " + users); for (File f : users) { if (params.minPageID != -1) { potential = new File(f, Integer.toString(params.minPageID) + ".xml"); if (potential.exists()) { paths.add(potential); } } else { for (File p : VersionedRepo.search(path + f.getName() + "/*.xml")) { try { int pID = Integer.parseInt(p.getName().substring(0, p.getName().indexOf("."))); paths.add(p); } catch (Exception e) { e.printStackTrace(); } } } } } docs = paths; } } else if (params.userName != null) { searchString = "PUBLISHED".equals(params.userName) ? "*/*.xml" : ("initial/"+params.userName); } else searchString = "initial/*/*.xml"; StringBuffer retVal = new StringBuffer(); if (docs == null) { docs = new ArrayList(); Collections.addAll(docs, VersionedRepo.search(searchString)); } docs.add(new File("EOF")); retVal.append(""); String inDocID = null; String inDocIDUserName = null; Map inDocIDPages = null; //params.getLogger().info("Searching docs.length: : " + docs.size()); for (File f : docs) { //params.getLogger().info("Searching file: " + f.getAbsolutePath()); try { String documentID = null; String pageCount = null; String userName = null; if (!"EOF".equals(f.getName())) { if (f.isDirectory()) { documentID = f.getParentFile().getParentFile().getName(); pageCount = Integer.toString(f.list(new FilenameFilter() {public boolean accept(File dir, String name) { return name.endsWith(".xml"); } }).length); userName = params.userName; } else { pageCount = "1"; if ("initial".equals(f.getParentFile().getParentFile().getName())) { documentID = f.getParentFile().getParentFile().getParentFile().getName(); userName = f.getParentFile().getName(); // because */*.xml search string recurses and finds user transcriptions // we should probably make an option to search for only published transcriptions if ("PUBLISHED".equals(params.userName) && !"PUBLISHED".equals(userName)) continue; } else { documentID = f.getParentFile().getName(); userName = "PUBLISHED"; } if (endorsedProjects != null && !endorsedProjects.contains(userName)) { continue; } } } if (inDocID != null && (!inDocID.equals(documentID) || !inDocIDUserName.equals(userName))) { retVal.append(""); for (String idip : inDocIDPages.values()) { retVal.append(idip); } retVal.append(""); } else retVal.append("/>"); inDocID = null; inDocIDUserName = null; inDocIDPages = null; } if (inDocID == null && !f.isDirectory()) { inDocID = documentID; inDocIDUserName = userName; inDocIDPages = new TreeMap(); } if ("EOF".equals(f.getName())) continue; if (inDocID != null) { String pID = f.getName().substring(0, f.getName().lastIndexOf(".")); boolean add = true; java.util.regex.Matcher matcher = null; String transcriptionBody = ""; if (params.textPattern != null) { transcriptionBody = Page.getTranscription(Integer.parseInt(documentID), Integer.parseInt(pID), !"PUBLISHED".equals(userName) ? userName : null, null, false); if (transcriptionBody == null) transcriptionBody = ""; matcher = params.textPattern.matcher(transcriptionBody); add = matcher.find(); //params.getLogger().info("transcriptionBody.size: " + transcriptionBody.length() + "; matches: " + add); } if (add) { String pageText = " transcriptionBody.length()) end = transcriptionBody.length(); match += ""; String contextPre = HTTPUtils.canonize(transcriptionBody.substring(start, matcher.start())); match += contextPre; match += ""; match += ""; String contextPost = HTTPUtils.canonize(transcriptionBody.substring(matcher.end(), end)); match += contextPost; match += ""; String verseAttribute = ""; int previousABIndex = transcriptionBody.lastIndexOf(" -1) { String verse = Transcription.getAttribute(transcriptionBody.substring(previousABIndex), "n"); if (verse != null) { vk = new VerseKey(verse); verseAttribute = " verse=\""+vk.getShortText()+"\""; } } match += ""; match += HTTPUtils.canonize(transcriptionBody.substring(matcher.start(), matcher.end())); match += ""; if (params.replace != null) { matcher.appendReplacement(transcriptionBodyUpdated, params.replace); String replace = HTTPUtils.canonize(transcriptionBodyUpdated.substring(matcher.start() - lastEnd)); String confirm = DigestUtils.md5Hex("$1$vmrcre$" + documentID + pID + contextPre + replace + contextPost); match += ""; match += replace; match += ""; if (params.confirmStrings.contains(confirm)) { transcriptionBodyToSave.append(transcriptionBodyUpdated); savePage = true; } else { transcriptionBodyToSave.append(transcriptionBody.substring(lastEnd, matcher.end())); } } match += ""; if (savePage) matchHeader += " replaced=\"true\""; if (verseRangeVK == null || vk == null) { pageText += matchHeader + ">" + match; } else { if (verseRangeVK.contains(vk)) { pageText += matchHeader + ">" + match; } // else params.getLogger().info("Not in verse range: " + vk.getOSISRef() + " !in " + verseRangeVK.getRangeText()); } lastEnd = matcher.end(); } while (matcher.find()); pageText += ""; matcher.appendTail(transcriptionBodyToSave); if (savePage) { if (committingUserName.length() < 1) { params.addError(-6, "Permission denied."); break; } // check if we're saving a site transcription boolean siteAdmin = params.getUser() != null && params.getUser().hasRole("Site Administrator", userName); // userName will be SiteName if (!isAdmin && "PUBLISHED".equals(userName)) { params.addError(-6, "Must be a VMR Administrator or Transcription Manager to publish global transcriptions."); break; } if (!isAdmin && !committingUserName.equals(userName) && !siteAdmin) { params.addError(-6, "Must be a VMR Administrator to edit another user's transcription."); break; } Document doc = Document.getDocument(Integer.parseInt(documentID)); doc.setTranscriptionPage(Integer.parseInt(pID), transcriptionBodyToSave.toString(), !"PUBLISHED".equals(userName) ? userName : null, true, false, false, committingUserName.length() > 0 ? committingUserName : null); pagesSaved = true; } } else pageText += "/>"; //params.getLogger().info("pageText: " + pageText); inDocIDPages.put(Integer.parseInt(pID), pageText); //params.getLogger().info("Adding page. count: " + inDocIDPages.size()); } continue; } retVal.append(""); File pages[] = null; if (f.isDirectory()) { pages = f.listFiles(new FileFilter() { public boolean accept(File path) { return path.getName().endsWith(".xml"); } }); } else { pages = new File[1]; pages[0] = f; } Arrays.sort(pages, new Comparator() { public int compare(File o1, File o2) { Integer pID1 = 0; try { pID1 = Integer.parseInt(o1.getName().substring(0, o1.getName().lastIndexOf("."))); } catch (Exception e) {} Integer pID2 = 0; try { pID2 = Integer.parseInt(o2.getName().substring(0, o2.getName().lastIndexOf("."))); } catch (Exception e) {} return pID1 - pID2; } }); //params.getLogger().info("pages to search: " + pages.length); for (File p : pages) { String pID = p.getName().substring(0, p.getName().lastIndexOf(".")); boolean add = true; java.util.regex.Matcher matcher = null; String transcriptionBody = ""; if (params.textPattern != null) { //params.getLogger().info("Searching Page: " + documentID + "; (" + pID + ")"); transcriptionBody = Page.getTranscription(Integer.parseInt(documentID), Integer.parseInt(pID), !"PUBLISHED".equals(userName) ? userName : null, null, false); if (transcriptionBody == null) transcriptionBody = ""; matcher = params.textPattern.matcher(transcriptionBody); add = matcher.find(); //params.getLogger().info("transcriptionBody.size: " + transcriptionBody.length() + "; matches: " + add); } if (add) { String pageText = " transcriptionBody.length()) end = transcriptionBody.length(); match += ""; String contextPre = HTTPUtils.canonize(transcriptionBody.substring(start, matcher.start())); match += contextPre; match += ""; match += ""; String contextPost = HTTPUtils.canonize(transcriptionBody.substring(matcher.end(), end)); match += contextPost; match += ""; String hit = HTTPUtils.canonize(transcriptionBody.substring(matcher.start(), matcher.end())); String verseAttribute = ""; int previousABIndex = transcriptionBody.lastIndexOf(" -1) { String verse = Transcription.getAttribute(transcriptionBody.substring(previousABIndex), "n"); if (verse != null) { vk = new VerseKey(verse); verseAttribute = " verse=\""+vk.getShortText()+"\""; } } match += ""; match += hit; match += ""; if (params.replace != null) { matcher.appendReplacement(transcriptionBodyUpdated, params.replace); String replace = HTTPUtils.canonize(transcriptionBodyUpdated.substring(matcher.start() - lastEnd)); String confirm = DigestUtils.md5Hex("$1$vmrcre$" + documentID + pID + contextPre + replace + contextPost); match += ""; match += replace; match += ""; if (params.confirmStrings.contains(confirm)) { transcriptionBodyToSave.append(transcriptionBodyUpdated); savePage = true; } else { transcriptionBodyToSave.append(transcriptionBody.substring(lastEnd, matcher.end())); } } match += ""; if (savePage) matchHeader += " replaced=\"true\""; if (verseRangeVK == null || vk == null) { pageText += matchHeader + ">" + match; } else { if (verseRangeVK.contains(vk)) { pageText += matchHeader + ">" + match; } //else params.getLogger().info("Not in verse range: " + vk.getOSISRef() + " !in " + verseRangeVK.getRangeText()); } lastEnd = matcher.end(); } while (matcher.find()); pageText += ""; matcher.appendTail(transcriptionBodyToSave); if (savePage) { if (committingUserName.length() < 1) { params.addError(-6, "Permission denied."); break; } // check if we're saving a site transcription boolean siteAdmin = params.getUser() != null && params.getUser().hasRole("Site Administrator", userName); // userName will be SiteName if (!isAdmin && "PUBLISHED".equals(userName)) { params.addError(-6, "Must be a VMR Administrator or Transcription Manager to publish global transcriptions."); break; } if (!isAdmin && !committingUserName.equals(userName) && !siteAdmin) { params.addError(-6, "Must be a VMR Administrator to edit another user's transcription."); break; } Document doc = Document.getDocument(Integer.parseInt(documentID)); doc.setTranscriptionPage(Integer.parseInt(pID), transcriptionBodyToSave.toString(), !"PUBLISHED".equals(userName) ? userName : null, true, false, false, committingUserName.length() > 0 ? committingUserName : null); pagesSaved = true; } } else pageText += "/>"; //params.getLogger().info("pageText: " + pageText); retVal.append(pageText); } } retVal.append(""); } else { retVal.append("/>"); } } catch (Exception e) { e.printStackTrace(); } } if (pagesSaved) VersionedRepo.push(); if (params.getErrors().size() == 0) { retVal.append(""); if ("json".equals(params.format) || "csv".equals(params.format)) { try { Map result = Serializer.fromXML(retVal.toString()); Serializer.output(response, out, params, "csv".equals(params.format) ? ((Map)result.get("documents")).get("document") : result, null); } catch (Exception e) { params.getLogger().error("ERROR! " + e); e.printStackTrace(); } return; } else out.print(retVal); return; } } else { params.format = "html"; } Serializer.reportErrors(request, response, out, params); %>