%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
<%@ page trimDirectiveWhitespaces="true" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Document" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Transcription" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Page" %>
<%@ page import="org.crosswire.sword.keys.VerseKey" %>
<%@ page import="org.crosswire.sword.keys.ListKey" %>
<%@ page import="org.crosswire.utils.Utils" %>
<%@ page import="org.crosswire.utils.HTTPUtils" %>
<%@ page import="java.io.File" %>
<%@ page import="java.util.Date" %>
<%@ page import="java.util.Set" %>
<%@ page import="java.util.HashSet" %>
<%@ page import="java.util.Arrays" %>
<%@ page import="java.util.Collections" %>
<%@ page import="java.util.Map" %>
<%@ page import="java.util.TreeMap" %>
<%@ page import="java.util.HashMap" %>
<%@ page import="java.util.List" %>
<%@ page import="java.util.ArrayList" %>
<%@ page import="java.util.Comparator" %>
<%@ page import="java.io.FilenameFilter" %>
<%@ page import="java.io.FileFilter" %>
<%@ page import="org.crosswire.repo.VersionedRepo" %>
<%@ page import="javax.validation.constraints.Pattern" %>
<%@ page import="org.crosswire.webtools.annotation.*" %>
<%@ page import="org.crosswire.webtools.*" %>
<%@ page import="org.apache.commons.codec.digest.DigestUtils" %>
<%!
static HashMap specialFonts = new HashMap();
static HashSet rightJustify = new HashSet();
static {
specialFonts.put("bo", "Antinoou, AntinoouWeb");
specialFonts.put("sa", "Antinoou, AntinoouWeb");
specialFonts.put("fa", "Antinoou, AntinoouWeb");
specialFonts.put("mae", "Antinoou, AntinoouWeb");
specialFonts.put("ly", "Antinoou, AntinoouWeb");
specialFonts.put("cw", "Antinoou, AntinoouWeb");
specialFonts.put("syc", "Estrangelo Edessa, EstreWeb");
specialFonts.put("chu", "BukyvedeWeb");
rightJustify.add("syc");
rightJustify.add("he");
rightJustify.add("arb");
}
static final int MATCH_EXCERPT_CONTEXT = 40;
static final java.util.regex.Pattern lbPattern = java.util.regex.Pattern.compile(" {
protected int minDocID = -1;
protected int maxDocID = -1;
protected int minPageID = -1;
protected int maxPageID = -1;
protected Set confirmStrings = new HashSet();
protected java.util.regex.Pattern textPattern = null;
@Description(value = "Limit results to user who owns the transcription. Use \"ENDORSEDPROJECTS\" to search all transcriptions owned by projects endorsed in sysconfic.properties: TranscriptionIncludeProjects", example = "jsmith")
public String userName = null;
@Description(value = "Limit results to a docID. Can also be a range", example = "91035-91037")
@Pattern(regexp = "^[0-9]*(-[0-9]*)?$", message = "Please give a docID or docID range with min and max values separated by a - (dash).")
public String docID = null;
@Description(value = "Limit results to a pageID. Can also be a range", example = "10-550")
@Pattern(regexp = "^[0-9]*(-[0-9]*)?$", message = "Please give a pageID or pageID range with min and max values separated by a - (dash).")
public String pageID = null;
@Description(value = "Limit results to a indexContent", example = "Ps.90.1")
public String indexContent = null;
@Description(value = "Limit results to pages which contain a regular expression text pattern in the TEI data", example = "(]*>)\\.()")
public String text;
@Description(value = "Same as {text} but will be converted from iso8859-1 to utf-8", example = "(]*>)\\.()")
public String textConvert;
@Description(value = "Replacement string to substitute for matches to above {text} parameter. Parenthesized submatches can be specified with $n, e.g., $1 to represent the first parenthesis match from the {term} regular expression.", example = "$1;$2")
public String replace;
@Description(value = "Same as {replace} but will be converted from iso8859-1 to utf-8", example = "$1;$2")
public String replaceConvert;
@Description(value = "Perform an update of the transcription repository at the point specified by the confirm string specified. This string is obtained from the 'confirm' attribute on the replace element of a match. Can be specified multiple times", example = "d1b6f246c1c34ec2a8c9a81dcd84aa40")
public String confirm;
@Description(value = "detail of results: document, page", defaultValue = "document")
public String detail;
@Override
protected void afterLoad() {
}
@Override
protected void customValidation() {
if (docID != null) {
String range[] = docID.split("-");
minDocID = Integer.parseInt(range[0]);
maxDocID = Integer.parseInt(range[range.length-1]);
}
if (pageID != null) {
String range[] = pageID.split("-");
minPageID = Integer.parseInt(range[0]);
maxPageID = Integer.parseInt(range[range.length-1]);
}
if (minDocID != -1 && minPageID == -1 && indexContent != null && indexContent.trim().length() > 0) {
Document.SearchFilter searchFilter = new Document.SearchFilter();
searchFilter.docID = minDocID;
searchFilter.biblicalContent = indexContent;
Document docs[] = Document.searchDocuments(true, searchFilter);
//getLogger().info("searchFilter: " + searchFilter);
//getLogger().info("docs.length: " + docs.length);
if (docs != null && docs.length == 1) {
//getLogger().info("docs[0].getPages()length: " + docs[0].getPages().length);
for (Page p : docs[0].getPages()) {
if (minPageID == -1 || p.getPageID() < minPageID) minPageID = p.getPageID();
if (maxPageID == -1 || p.getPageID() > maxPageID) maxPageID = p.getPageID();
}
}
//getLogger().info("minPageID: " + minPageID);
//getLogger().info("maxPageID: " + maxPageID);
}
if (userName == null && minDocID == -1 && detail == null) {
addError(-1, "Must specify one of: docID, or userName");
}
if (textConvert != null) {
text = Transcription.assureUnicode(textConvert);
}
if (text != null) {
textPattern = java.util.regex.Pattern.compile(text);
}
if (replaceConvert != null) {
replace = Transcription.assureUnicode(replaceConvert);
}
String confirms[] = request.getParameterValues("confirm");
if (confirms != null) confirmStrings.addAll(Arrays.asList(confirms));
if (detail == null) detail = "document";
}
@Override
protected boolean permissionValidation(HttpServletRequest request, HttpServletResponse response) { return true; }
}
%>
<%
MyParameters params = new MyParameters().loadFromRequest(request, response);
if (params.getErrors().size() == 0) {
String transcriptionPublishedName = Utils.getSysConfig(session).getProperty("TranscriptionPublishedName", "PUBLISHED");
String defaultFontSpec = Utils.getSysConfig(session).getProperty("DefaultFontSpec", "GentiumPlus");
String committingUserName = params.getUser() != null ? params.getUser().getUserName() : null;
if (committingUserName == null) committingUserName = "";
List docs = null;
response.setContentType("text/xml");
boolean pagesSaved = false;
ListKey verseRangeVK = null;
if (params.indexContent != null) {
verseRangeVK = new VerseKey().ParseVerseList(params.indexContent, "Mat.1.1", true);
}
// ---------------------------------------------
String searchString = "";
Set endorsedProjects = null;
if ("ENDORSEDPROJECTS".equals(params.userName)) {
String transcriptionIncludeProjects = Utils.getSysConfig(session).getProperty("TranscriptionIncludeProjects", "");
endorsedProjects = new HashSet(Arrays.asList(transcriptionIncludeProjects.split("\\|")));
params.getLogger().info(endorsedProjects);
}
// let's check to see if we're a privileged user
boolean isAdmin = params.getUser() != null && (
params.getUser().hasRole("VMR Administrator")
|| params.getUser().hasRole("Transcription Manager")
|| params.getUser().hasRole("Transcription Publisher"));
// searching for transcriptions of a particular docID
if (params.minDocID != -1) {
// searching for transcriptions of a particular docID+pageID
Document doc = Document.getDocument(params.minDocID);
if (doc != null) {
List paths = new ArrayList();
// TODO: fold endorsedProjects logic into the else below, but for now this is fast and works
if (endorsedProjects != null) {
for (String user : endorsedProjects) {
if (doc.isTranscriptionPageExists(params.minPageID, user)) {
paths.add(new File(Integer.toString(params.minDocID)+"/initial/"+user+"/"+params.minPageID+".xml"));
}
}
}
else {
String path = Integer.toString(params.minDocID);
File potential = null;
// check for PUBLISHED transcription
// yeah, so, since we get the username below from the 'initial/ portion, we hack this :(
// NB: we are not using 'path' in the check for exists here
if (params.userName == null || "PUBLISHED".equals(params.userName)) {
if (params.minPageID != -1 && params.maxPageID == -1) {
potential = new File(path + "/initial/PUBLISHED/" + Integer.toString(params.minPageID) + ".xml");
if (doc.isTranscriptionPageExists(params.minPageID, null)) {
paths.add(potential);
}
}
else {
for (File p : VersionedRepo.search(path + "/*.xml")) {
try {
int pID = Integer.parseInt(p.getName().substring(0, p.getName().indexOf(".")));
if (pID >= params.minPageID && (pID <= params.maxPageID || params.maxPageID == -1)) {
potential = new File(path + "/initial/PUBLISHED/" + Integer.toString(pID) + ".xml");
paths.add(potential);
}
}
catch (Exception e) { e.printStackTrace(); }
}
}
}
// end check for PUBLISHED
path += "/initial/";
// check for user transcriptions
File[] users = null;
if (params.userName != null) {
users = new File[1];
users[0] = new File(params.userName);
}
else users = VersionedRepo.getFolders(path);
//params.getLogger().info("user: " + users);
for (File f : users) {
if (params.minPageID != -1) {
potential = new File(f, Integer.toString(params.minPageID) + ".xml");
if (potential.exists()) {
paths.add(potential);
}
}
else {
for (File p : VersionedRepo.search(path + f.getName() + "/*.xml")) {
try {
int pID = Integer.parseInt(p.getName().substring(0, p.getName().indexOf(".")));
paths.add(p);
}
catch (Exception e) { e.printStackTrace(); }
}
}
}
}
docs = paths;
}
}
else if (params.userName != null) {
searchString = "PUBLISHED".equals(params.userName) ? "*/*.xml" : ("initial/"+params.userName);
}
else searchString = "initial/*/*.xml";
StringBuffer retVal = new StringBuffer();
if (docs == null) {
docs = new ArrayList(); Collections.addAll(docs, VersionedRepo.search(searchString));
}
docs.add(new File("EOF"));
retVal.append("");
String inDocID = null;
String inDocIDUserName = null;
Map inDocIDPages = null;
//params.getLogger().info("Searching docs.length: : " + docs.size());
for (File f : docs) {
//params.getLogger().info("Searching file: " + f.getAbsolutePath());
try {
String documentID = null;
String pageCount = null;
String userName = null;
if (!"EOF".equals(f.getName())) {
if (f.isDirectory()) {
documentID = f.getParentFile().getParentFile().getName();
pageCount = Integer.toString(f.list(new FilenameFilter() {public boolean accept(File dir, String name) { return name.endsWith(".xml"); } }).length);
userName = params.userName;
}
else {
pageCount = "1";
if ("initial".equals(f.getParentFile().getParentFile().getName())) {
documentID = f.getParentFile().getParentFile().getParentFile().getName();
userName = f.getParentFile().getName();
// because */*.xml search string recurses and finds user transcriptions
// we should probably make an option to search for only published transcriptions
if ("PUBLISHED".equals(params.userName) && !"PUBLISHED".equals(userName)) continue;
}
else {
documentID = f.getParentFile().getName();
userName = "PUBLISHED";
}
if (endorsedProjects != null && !endorsedProjects.contains(userName)) {
continue;
}
}
}
if (inDocID != null && (!inDocID.equals(documentID) || !inDocIDUserName.equals(userName))) {
retVal.append("");
for (String idip : inDocIDPages.values()) { retVal.append(idip); }
retVal.append("");
}
else retVal.append("/>");
inDocID = null;
inDocIDUserName = null;
inDocIDPages = null;
}
if (inDocID == null && !f.isDirectory()) {
inDocID = documentID;
inDocIDUserName = userName;
inDocIDPages = new TreeMap();
}
if ("EOF".equals(f.getName())) continue;
if (inDocID != null) {
String pID = f.getName().substring(0, f.getName().lastIndexOf("."));
boolean add = true;
java.util.regex.Matcher matcher = null;
String transcriptionBody = "";
if (params.textPattern != null) {
transcriptionBody = Page.getTranscription(Integer.parseInt(documentID), Integer.parseInt(pID), !"PUBLISHED".equals(userName) ? userName : null, null, false);
if (transcriptionBody == null) transcriptionBody = "";
matcher = params.textPattern.matcher(transcriptionBody);
add = matcher.find();
//params.getLogger().info("transcriptionBody.size: " + transcriptionBody.length() + "; matches: " + add);
}
if (add) {
String pageText = "";
int lastEnd = 0;
StringBuffer transcriptionBodyToSave = new StringBuffer();
boolean savePage = false;
do {
String font = defaultFontSpec;
// do we really want to lookup the doc for each result and get its language to return proper font?
// probably, but I am not sure the speed detriment for ALL searches with, sway 40,000 results
// so for now, let's just return defaultFontSpec
/*
Document doc = Document.getDocument(Integer.parseInt(documentID));
String lang = doc.getLanguage();
if (specialFonts.containsKey(lang)) font = specialFonts.get(lang);
*/
StringBuffer transcriptionBodyUpdated = new StringBuffer();
int start = matcher.start();
int end = matcher.end();
int lineNo = 0;
java.util.regex.Matcher lbMatcher = lbPattern.matcher(transcriptionBody.substring(0, end));
while (lbMatcher.find()) ++lineNo;
String matchHeader = " transcriptionBody.length()) end = transcriptionBody.length();
match += "";
String contextPre = HTTPUtils.canonize(transcriptionBody.substring(start, matcher.start()));
match += contextPre;
match += "";
match += "";
String contextPost = HTTPUtils.canonize(transcriptionBody.substring(matcher.end(), end));
match += contextPost;
match += "";
String verseAttribute = "";
int previousABIndex = transcriptionBody.lastIndexOf(" -1) {
String verse = Transcription.getAttribute(transcriptionBody.substring(previousABIndex), "n");
if (verse != null) {
vk = new VerseKey(verse);
verseAttribute = " verse=\""+vk.getShortText()+"\"";
}
}
match += "";
match += HTTPUtils.canonize(transcriptionBody.substring(matcher.start(), matcher.end()));
match += "";
if (params.replace != null) {
matcher.appendReplacement(transcriptionBodyUpdated, params.replace);
String replace = HTTPUtils.canonize(transcriptionBodyUpdated.substring(matcher.start() - lastEnd));
String confirm = DigestUtils.md5Hex("$1$vmrcre$" + documentID + pID + contextPre + replace + contextPost);
match += "";
match += replace;
match += "";
if (params.confirmStrings.contains(confirm)) {
transcriptionBodyToSave.append(transcriptionBodyUpdated);
savePage = true;
}
else {
transcriptionBodyToSave.append(transcriptionBody.substring(lastEnd, matcher.end()));
}
}
match += " ";
if (savePage) matchHeader += " replaced=\"true\"";
if (verseRangeVK == null || vk == null) {
pageText += matchHeader + ">" + match;
}
else {
if (verseRangeVK.contains(vk)) {
pageText += matchHeader + ">" + match;
}
// else params.getLogger().info("Not in verse range: " + vk.getOSISRef() + " !in " + verseRangeVK.getRangeText());
}
lastEnd = matcher.end();
} while (matcher.find());
pageText += "";
matcher.appendTail(transcriptionBodyToSave);
if (savePage) {
if (committingUserName.length() < 1) {
params.addError(-6, "Permission denied.");
break;
}
// check if we're saving a site transcription
boolean siteAdmin = params.getUser() != null && params.getUser().hasRole("Site Administrator", userName); // userName will be SiteName
if (!isAdmin && "PUBLISHED".equals(userName)) {
params.addError(-6, "Must be a VMR Administrator or Transcription Manager to publish global transcriptions.");
break;
}
if (!isAdmin && !committingUserName.equals(userName) && !siteAdmin) {
params.addError(-6, "Must be a VMR Administrator to edit another user's transcription.");
break;
}
Document doc = Document.getDocument(Integer.parseInt(documentID));
doc.setTranscriptionPage(Integer.parseInt(pID), transcriptionBodyToSave.toString(), !"PUBLISHED".equals(userName) ? userName : null, true, false, false, committingUserName.length() > 0 ? committingUserName : null);
pagesSaved = true;
}
}
else pageText += "/>";
//params.getLogger().info("pageText: " + pageText);
inDocIDPages.put(Integer.parseInt(pID), pageText);
//params.getLogger().info("Adding page. count: " + inDocIDPages.size());
}
continue;
}
retVal.append("");
File pages[] = null;
if (f.isDirectory()) {
pages = f.listFiles(new FileFilter() { public boolean accept(File path) { return path.getName().endsWith(".xml"); } });
}
else {
pages = new File[1];
pages[0] = f;
}
Arrays.sort(pages, new Comparator() {
public int compare(File o1, File o2) {
Integer pID1 = 0; try { pID1 = Integer.parseInt(o1.getName().substring(0, o1.getName().lastIndexOf("."))); } catch (Exception e) {}
Integer pID2 = 0; try { pID2 = Integer.parseInt(o2.getName().substring(0, o2.getName().lastIndexOf("."))); } catch (Exception e) {}
return pID1 - pID2;
}
});
//params.getLogger().info("pages to search: " + pages.length);
for (File p : pages) {
String pID = p.getName().substring(0, p.getName().lastIndexOf("."));
boolean add = true;
java.util.regex.Matcher matcher = null;
String transcriptionBody = "";
if (params.textPattern != null) {
//params.getLogger().info("Searching Page: " + documentID + "; (" + pID + ")");
transcriptionBody = Page.getTranscription(Integer.parseInt(documentID), Integer.parseInt(pID), !"PUBLISHED".equals(userName) ? userName : null, null, false);
if (transcriptionBody == null) transcriptionBody = "";
matcher = params.textPattern.matcher(transcriptionBody);
add = matcher.find();
//params.getLogger().info("transcriptionBody.size: " + transcriptionBody.length() + "; matches: " + add);
}
if (add) {
String pageText = "";
int lastEnd = 0;
StringBuffer transcriptionBodyToSave = new StringBuffer();
boolean savePage = false;
do {
String font = defaultFontSpec;
// do we really want to lookup the doc for each result and get its language to return proper font?
// probably, but I am not sure the speed detriment for ALL searches with, sway 40,000 results
// if slow, let's just return defaultFontSpec
/*
Document doc = Document.getDocument(Integer.parseInt(documentID));
String lang = doc.getLanguage();
if (specialFonts.containsKey(lang)) font = specialFonts.get(lang);
*/
StringBuffer transcriptionBodyUpdated = new StringBuffer();
int start = matcher.start();
int end = matcher.end();
int lineNo = 0;
java.util.regex.Matcher lbMatcher = lbPattern.matcher(transcriptionBody.substring(0, end));
while (lbMatcher.find()) ++lineNo;
String matchHeader = " transcriptionBody.length()) end = transcriptionBody.length();
match += "";
String contextPre = HTTPUtils.canonize(transcriptionBody.substring(start, matcher.start()));
match += contextPre;
match += "";
match += "";
String contextPost = HTTPUtils.canonize(transcriptionBody.substring(matcher.end(), end));
match += contextPost;
match += "";
String hit = HTTPUtils.canonize(transcriptionBody.substring(matcher.start(), matcher.end()));
String verseAttribute = "";
int previousABIndex = transcriptionBody.lastIndexOf(" -1) {
String verse = Transcription.getAttribute(transcriptionBody.substring(previousABIndex), "n");
if (verse != null) {
vk = new VerseKey(verse);
verseAttribute = " verse=\""+vk.getShortText()+"\"";
}
}
match += "";
match += hit;
match += "";
if (params.replace != null) {
matcher.appendReplacement(transcriptionBodyUpdated, params.replace);
String replace = HTTPUtils.canonize(transcriptionBodyUpdated.substring(matcher.start() - lastEnd));
String confirm = DigestUtils.md5Hex("$1$vmrcre$" + documentID + pID + contextPre + replace + contextPost);
match += "";
match += replace;
match += "";
if (params.confirmStrings.contains(confirm)) {
transcriptionBodyToSave.append(transcriptionBodyUpdated);
savePage = true;
}
else {
transcriptionBodyToSave.append(transcriptionBody.substring(lastEnd, matcher.end()));
}
}
match += " ";
if (savePage) matchHeader += " replaced=\"true\"";
if (verseRangeVK == null || vk == null) {
pageText += matchHeader + ">" + match;
}
else {
if (verseRangeVK.contains(vk)) {
pageText += matchHeader + ">" + match;
}
//else params.getLogger().info("Not in verse range: " + vk.getOSISRef() + " !in " + verseRangeVK.getRangeText());
}
lastEnd = matcher.end();
} while (matcher.find());
pageText += "";
matcher.appendTail(transcriptionBodyToSave);
if (savePage) {
if (committingUserName.length() < 1) {
params.addError(-6, "Permission denied.");
break;
}
// check if we're saving a site transcription
boolean siteAdmin = params.getUser() != null && params.getUser().hasRole("Site Administrator", userName); // userName will be SiteName
if (!isAdmin && "PUBLISHED".equals(userName)) {
params.addError(-6, "Must be a VMR Administrator or Transcription Manager to publish global transcriptions.");
break;
}
if (!isAdmin && !committingUserName.equals(userName) && !siteAdmin) {
params.addError(-6, "Must be a VMR Administrator to edit another user's transcription.");
break;
}
Document doc = Document.getDocument(Integer.parseInt(documentID));
doc.setTranscriptionPage(Integer.parseInt(pID), transcriptionBodyToSave.toString(), !"PUBLISHED".equals(userName) ? userName : null, true, false, false, committingUserName.length() > 0 ? committingUserName : null);
pagesSaved = true;
}
}
else pageText += "/>";
//params.getLogger().info("pageText: " + pageText);
retVal.append(pageText);
}
}
retVal.append("");
}
else {
retVal.append("/>");
}
}
catch (Exception e) { e.printStackTrace(); }
}
if (pagesSaved) VersionedRepo.push();
if (params.getErrors().size() == 0) {
retVal.append("");
if ("json".equals(params.format) || "csv".equals(params.format)) {
try {
Map result = Serializer.fromXML(retVal.toString());
Serializer.output(response, out, params, "csv".equals(params.format) ? ((Map)result.get("documents")).get("document") : result, null);
}
catch (Exception e) { params.getLogger().error("ERROR! " + e); e.printStackTrace(); }
return;
}
else out.print(retVal);
return;
}
}
else {
params.format = "html";
}
Serializer.reportErrors(request, response, out, params);
%>