%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
<%@ page trimDirectiveWhitespaces="true" %>
<%@ page import="org.crosswire.utils.Utils" %>
<%@ page import="org.crosswire.utils.Sessions" %>
<%@ page import="org.crosswire.xml.XMLBlock" %>
<%@ page import="org.crosswire.xml.XMLTag" %>
<%@ page import="org.crosswire.repo.VersionedRepo" %>
<%@ page import="org.crosswire.utils.HTTPUtils" %>
<%@ page import="org.crosswire.sword.keys.VerseKey" %>
<%@ page import="org.crosswire.sword.keys.ListKey" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Transcription" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Document" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Page" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.Convert" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.Convert.VerseTranscription" %>
<%@ page import="java.util.Map" %>
<%@ page import="java.util.Vector" %>
<%@ page import="org.apache.log4j.Logger" %>
<%@ page import="org.apache.commons.fileupload.*" %>
<%@ page import="java.util.List" %>
<%
Logger logger = Logger.getLogger("transcript/splitpages");
try {
//dumpCallInfo(request, logger);
String sessionHash = Sessions.getSessionHash(request);
String transcript = request.getParameter("text");
String serverPath = request.getParameter("serverPath");
boolean plainText = "true".equals(request.getParameter("plainText"));
boolean symbolGreek = "true".equals(request.getParameter("symbolGreek"));
boolean ignoreUnderdot = "true".equals(request.getParameter("ignoreUnderdot"));
String format = request.getParameter("format");
int docID = -1; try { docID = Integer.parseInt(request.getParameter("docID")); } catch (Exception e) {}
if (transcript != null && !symbolGreek) { transcript = Transcription.assureUnicode(transcript); }
// See if we're receiving an upload of a transcription
if (transcript == null) {
try {
DiskFileUpload fu = new DiskFileUpload();
// maximum size before a FileUploadException will be thrown
fu.setSizeMax(99000000);
// maximum size that will be stored in memory
fu.setSizeThreshold(4096);
// the location for saving data that is larger than getSizeThreshold()
fu.setRepositoryPath(session.getServletContext().getRealPath("tmp/"));
List fileItems = fu.parseRequest(request);
//logger.info("transcript is null, checking file upload. fileItems, size: " + (fileItems != null ? fileItems.size() : 0));
for (int i = 0; i < fileItems.size(); ++i) {
FileItem fi = (FileItem)fileItems.get(i);
if (fi.isFormField()) {
//logger.info("processing form field: " + fi.getFieldName() + " = " +fi.getString());
if ("docID".equals(fi.getFieldName())) { try { docID = Integer.parseInt(fi.getString()); } catch (Exception e) {} }
if ("plainText".equals(fi.getFieldName())) { plainText = "true".equals(fi.getString()); }
if ("symbolGreek".equals(fi.getFieldName())) { symbolGreek = "true".equals(fi.getString()); }
if ("ignoreUnderdot".equals(fi.getFieldName())) { ignoreUnderdot = "true".equals(fi.getString()); }
if ("format".equals(fi.getFieldName())) { format = fi.getString(); }
}
else {
byte uploadBytes[] = fi.get();
transcript = new String(uploadBytes, symbolGreek ? "iso8859-1" : "UTF-8");
//logger.info("processing upload data, size: " + transcript.length());
}
}
} catch (Exception e) {
logger.debug(e);
}
}
//logger.info("transcript/splitpages(serverPath: " + serverPath + "; plainText: " + plainText + "; symbolGreek: " + symbolGreek + "; ignoreUnderdot: " + ignoreUnderdot + "; format: " + format + "; transcript, size: " + (transcript != null ? transcript.length() : -1));
Transcription transcription = null;
// if we have a server path specified, then we will load our transcription from the server path
//
if (serverPath != null && serverPath.startsWith("/data/")) {
// if we are already XML, then just read the file from the server into the transcript buffer
// as if it was passed directly to us
if (!plainText) {
transcript = Transcription.readFile(serverPath);
}
// otherwise we need to do more processing and we have a Transcription static to do all of that
else transcription = Transcription.fromPlainTextFile(serverPath, ignoreUnderdot, !symbolGreek);
}
// if we have a transcript buffer, we need to be sure it has a header.
if (transcript != null && !symbolGreek && !transcript.trim().startsWith("")) transcript = "" + transcript;
//logger.info("transcript/splitpages: transcript.length(): " + (transcript != null ? transcript.length() : 0));
// if we have a transcript buffer, let's construct our transcription from the buffer now
if (transcript != null && transcription == null) {
if (!plainText) {
transcription = new Transcription(transcript);
}
else transcription = Transcription.fromPlainText(transcript, ignoreUnderdot, !symbolGreek);
}
if (transcription != null) {
//logger.info("transcript/splitpages: transcription.getPages().size(): " + transcription.getPages().size());
response.setContentType("text/xml");
%>
Split transcription XML into pages
text | text of transcription |
docID | docID, (optional) to help with context |
format | (xml), pagetext |
plainText | (false) or true, whether the specified input is plaintext or not |
symbolGreek | (false) or true, whether the specified input is symbol greek instead of Unicode |
ignoreUnderdot | (false) or true, whether or not to specially process the underdot as unclear |