<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ page trimDirectiveWhitespaces="true" %> <%@ page import="org.crosswire.utils.Utils" %> <%@ page import="org.crosswire.utils.Sessions" %> <%@ page import="org.crosswire.xml.XMLBlock" %> <%@ page import="org.crosswire.xml.XMLTag" %> <%@ page import="org.crosswire.repo.VersionedRepo" %> <%@ page import="org.crosswire.utils.HTTPUtils" %> <%@ page import="org.crosswire.sword.keys.VerseKey" %> <%@ page import="org.crosswire.sword.keys.ListKey" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Document" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Page" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.Convert" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.Convert.VerseTranscription" %> <%@ page import="java.util.Map" %> <%@ page import="java.util.Vector" %> <%@ page import="org.apache.log4j.Logger" %> <%@ page import="org.apache.commons.fileupload.*" %> <%@ page import="java.util.List" %> <% Logger logger = Logger.getLogger("transcript/splitpages"); try { //dumpCallInfo(request, logger); String sessionHash = Sessions.getSessionHash(request); String transcript = request.getParameter("text"); String serverPath = request.getParameter("serverPath"); boolean plainText = "true".equals(request.getParameter("plainText")); boolean symbolGreek = "true".equals(request.getParameter("symbolGreek")); boolean ignoreUnderdot = "true".equals(request.getParameter("ignoreUnderdot")); String format = request.getParameter("format"); int docID = -1; try { docID = Integer.parseInt(request.getParameter("docID")); } catch (Exception e) {} if (transcript != null && !symbolGreek) { transcript = Transcription.assureUnicode(transcript); } // See if we're receiving an upload of a transcription if (transcript == null) { try { DiskFileUpload fu = new DiskFileUpload(); // maximum size before a FileUploadException will be thrown fu.setSizeMax(99000000); // maximum size that will be stored in memory fu.setSizeThreshold(4096); // the location for saving data that is larger than getSizeThreshold() fu.setRepositoryPath(session.getServletContext().getRealPath("tmp/")); List fileItems = fu.parseRequest(request); //logger.info("transcript is null, checking file upload. fileItems, size: " + (fileItems != null ? fileItems.size() : 0)); for (int i = 0; i < fileItems.size(); ++i) { FileItem fi = (FileItem)fileItems.get(i); if (fi.isFormField()) { //logger.info("processing form field: " + fi.getFieldName() + " = " +fi.getString()); if ("docID".equals(fi.getFieldName())) { try { docID = Integer.parseInt(fi.getString()); } catch (Exception e) {} } if ("plainText".equals(fi.getFieldName())) { plainText = "true".equals(fi.getString()); } if ("symbolGreek".equals(fi.getFieldName())) { symbolGreek = "true".equals(fi.getString()); } if ("ignoreUnderdot".equals(fi.getFieldName())) { ignoreUnderdot = "true".equals(fi.getString()); } if ("format".equals(fi.getFieldName())) { format = fi.getString(); } } else { byte uploadBytes[] = fi.get(); transcript = new String(uploadBytes, symbolGreek ? "iso8859-1" : "UTF-8"); //logger.info("processing upload data, size: " + transcript.length()); } } } catch (Exception e) { logger.debug(e); } } //logger.info("transcript/splitpages(serverPath: " + serverPath + "; plainText: " + plainText + "; symbolGreek: " + symbolGreek + "; ignoreUnderdot: " + ignoreUnderdot + "; format: " + format + "; transcript, size: " + (transcript != null ? transcript.length() : -1)); Transcription transcription = null; // if we have a server path specified, then we will load our transcription from the server path // if (serverPath != null && serverPath.startsWith("/data/")) { // if we are already XML, then just read the file from the server into the transcript buffer // as if it was passed directly to us if (!plainText) { transcript = Transcription.readFile(serverPath); } // otherwise we need to do more processing and we have a Transcription static to do all of that else transcription = Transcription.fromPlainTextFile(serverPath, ignoreUnderdot, !symbolGreek); } // if we have a transcript buffer, we need to be sure it has a header. if (transcript != null && !symbolGreek && !transcript.trim().startsWith("" + transcript; //logger.info("transcript/splitpages: transcript.length(): " + (transcript != null ? transcript.length() : 0)); // if we have a transcript buffer, let's construct our transcription from the buffer now if (transcript != null && transcription == null) { if (!plainText) { transcription = new Transcription(transcript); } else transcription = Transcription.fromPlainText(transcript, ignoreUnderdot, !symbolGreek); } if (transcription != null) { //logger.info("transcript/splitpages: transcription.getPages().size(): " + transcription.getPages().size()); response.setContentType("text/xml"); %> <%=HTTPUtils.canonize(transcription.getHead())%> <% boolean first = true; for (String pg : transcription.getPages()) { // pg = Transcription.alignV11n(pg); // get ab tags to build verses StringBuffer abs = new StringBuffer(); for (int i = pg.indexOf(" -1; i = pg.indexOf("", i); if (e > -1) { String tag = pg.substring(i+1, e); String ref = Transcription.getAttribute(tag, "osisID"); if (ref == null || ref.length() < 1) ref = Transcription.getAttribute(tag, "n"); if (ref != null && ref.length() > 0) { if (abs.length() > 0) abs.append(";"); abs.append(ref); } } else break; } // get folio info String folio = null; String pageNum = null; long pageID = -1; int i = pg.indexOf("", i); if (e > -1) { String tag = pg.substring(i+1, e); folio = Transcription.getAttribute(tag, "n"); try { pageID = Long.parseLong(Transcription.getAttribute(tag, "xml:id")); } catch(Exception e2) {} String fType = Transcription.getAttribute(tag, "type"); String osisID = Transcription.getAttribute(tag, "osisID"); if (osisID != null && osisID.length() > 0) { abs = new StringBuffer(osisID); } String pageIDAttr = Transcription.getAttribute(tag, "pageID"); if (pageIDAttr != null && pageIDAttr.length() > 0) { try { pageID = Long.parseLong(pageIDAttr); } catch (Exception e2) {} } if ("page".equals(fType)) { pageNum = folio; folio = null; } } //logger.info("processed page: folio: " + folio + "; page: " + pageNum + "; verses: " + abs); if (first) { //logger.info("first page text: " + pg); first = false; } %> <%=pageNum != null ? "page=\""+pageNum+"\"" : "folio=\""+folio+"\""%>><%= "pagetext".equals(format) ? HTTPUtils.canonize(pg.toString()) : pg %> <% } %> <%=HTTPUtils.canonize(transcription.getTail())%> <% return; } } catch (Exception e) { e.printStackTrace(); return; } %> <%! public static void dumpCallInfo(HttpServletRequest request, Logger logger) { logger.info("transcript/splitpages called..."); logger.info("dumping headers................................................."); logger.info("request.getContentType: " + request.getContentType()); logger.info("Headers: "); for (Object o: java.util.Collections.list(request.getHeaderNames())) { logger.info(o + "=" + request.getHeader(o.toString())); } logger.info("Attributes: "); for (Object o: java.util.Collections.list(request.getAttributeNames())) { logger.info(o + "=" + request.getAttribute(o.toString())); } logger.info("Parameters: "); for (Object o: java.util.Collections.list(request.getParameterNames())) { for (String v: request.getParameterValues(o.toString())) { logger.info(o + "=" + v); } } logger.info("end of dumping headers.........................................."); } %>

transcript/splitpages

Split transcription XML into pages

Parameters

texttext of transcription
docIDdocID, (optional) to help with context
format(xml), pagetext
plainText(false) or true, whether the specified input is plaintext or not
symbolGreek(false) or true, whether the specified input is symbol greek instead of Unicode
ignoreUnderdot(false) or true, whether or not to specially process the underdot as unclear