<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
<%@ page trimDirectiveWhitespaces="true" %>
<%@ page import="org.crosswire.utils.Utils" %>
<%@ page import="org.crosswire.utils.Sessions" %>
<%@ page import="org.crosswire.xml.XMLBlock" %>
<%@ page import="org.crosswire.xml.XMLTag" %>
<%@ page import="org.crosswire.repo.VersionedRepo" %>
<%@ page import="org.crosswire.utils.HTTPUtils" %>
<%@ page import="org.crosswire.sword.keys.VerseKey" %>
<%@ page import="org.crosswire.sword.keys.ListKey" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Transcription" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Document" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Page" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.Convert" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Transcription.Convert.VerseTranscription" %>
<%@ page import="java.util.Map" %>
<%@ page import="java.util.Vector" %>
<%@ page import="org.apache.log4j.Logger" %>
<%@ page import="org.apache.commons.fileupload.*" %>
<%@ page import="java.util.List" %>
<%

Logger logger = Logger.getLogger("transcript/splitpages");

try {

//dumpCallInfo(request, logger);

String sessionHash = Sessions.getSessionHash(request);

String transcript = request.getParameter("text");
String serverPath = request.getParameter("serverPath");
boolean plainText = "true".equals(request.getParameter("plainText"));
boolean symbolGreek = "true".equals(request.getParameter("symbolGreek"));
boolean ignoreUnderdot = "true".equals(request.getParameter("ignoreUnderdot"));
String format = request.getParameter("format");
int docID = -1; try { docID = Integer.parseInt(request.getParameter("docID")); } catch (Exception e) {}
if (transcript != null && !symbolGreek) { transcript = Transcription.assureUnicode(transcript); }

// See if we're receiving an upload of a transcription
if (transcript == null) {
try {
	DiskFileUpload fu = new DiskFileUpload();
	// maximum size before a FileUploadException will be thrown
	fu.setSizeMax(99000000);
	// maximum size that will be stored in memory
	fu.setSizeThreshold(4096);
	// the location for saving data that is larger than getSizeThreshold()
	fu.setRepositoryPath(session.getServletContext().getRealPath("tmp/"));

	List fileItems = fu.parseRequest(request);
//logger.info("transcript is null, checking file upload. fileItems, size: " + (fileItems != null ? fileItems.size() : 0));
	for (int i = 0; i < fileItems.size(); ++i) {
		FileItem fi = (FileItem)fileItems.get(i);
		if (fi.isFormField()) {
//logger.info("processing form field: " + fi.getFieldName() + " = " +fi.getString());
			if ("docID".equals(fi.getFieldName()))  { try { docID = Integer.parseInt(fi.getString()); } catch (Exception e) {} }
			if ("plainText".equals(fi.getFieldName()))  { plainText = "true".equals(fi.getString()); }
			if ("symbolGreek".equals(fi.getFieldName()))  { symbolGreek = "true".equals(fi.getString()); }
			if ("ignoreUnderdot".equals(fi.getFieldName()))  { ignoreUnderdot = "true".equals(fi.getString()); }
			if ("format".equals(fi.getFieldName())) { format = fi.getString(); }
		}
		else {
			byte uploadBytes[] = fi.get();
			transcript = new String(uploadBytes, symbolGreek ? "iso8859-1" : "UTF-8");
//logger.info("processing upload data, size: " + transcript.length());
		}
	}
} catch (Exception e) {
	logger.debug(e);
}
}

//logger.info("transcript/splitpages(serverPath: " + serverPath + "; plainText: " + plainText + "; symbolGreek: " + symbolGreek + "; ignoreUnderdot: " + ignoreUnderdot + "; format: " + format +  "; transcript, size: " + (transcript != null ? transcript.length() : -1));


Transcription transcription = null;

// if we have a server path specified, then we will load our transcription from the server path
//
if (serverPath != null && serverPath.startsWith("/data/")) {
	// if we are already XML, then just read the file from the server into the transcript buffer
	// as if it was passed directly to us
	if (!plainText) {
		transcript = Transcription.readFile(serverPath);
	}
	// otherwise we need to do more processing and we have a Transcription static to do all of that
	else transcription = Transcription.fromPlainTextFile(serverPath, ignoreUnderdot, !symbolGreek);
}

// if we have a transcript buffer, we need to be sure it has a header.
if (transcript != null && !symbolGreek && !transcript.trim().startsWith("<?")) transcript = "<?xml  version=\"1.0\" encoding=\"utf-8\"?>" + transcript;

//logger.info("transcript/splitpages: transcript.length(): " + (transcript != null ? transcript.length() : 0));

// if we have a transcript buffer, let's construct our transcription from the buffer now
if (transcript != null && transcription == null) {
	if (!plainText) {
		transcription = new Transcription(transcript);
	}
	else transcription = Transcription.fromPlainText(transcript, ignoreUnderdot, !symbolGreek);

}


if (transcription != null) {
//logger.info("transcript/splitpages: transcription.getPages().size(): " + transcription.getPages().size());

	response.setContentType("text/xml");
	
%>
<split docID="<%=transcription.getDocID()%>">
<head><%=HTTPUtils.canonize(transcription.getHead())%></head>
<pages>
<%
	boolean first = true;
	for (String pg : transcription.getPages()) {

//		pg = Transcription.alignV11n(pg);
		// get ab tags to build verses
		StringBuffer abs = new StringBuffer();
		for (int i = pg.indexOf("<ab "); i > -1; i = pg.indexOf("<ab ", i+1)) {
			int e = pg.indexOf(">", i);
			if (e > -1) {
				String tag = pg.substring(i+1, e);
				String ref = Transcription.getAttribute(tag, "osisID");
				if (ref == null || ref.length() < 1) ref = Transcription.getAttribute(tag, "n");
				
				if (ref != null && ref.length() > 0) {
					if (abs.length() > 0) abs.append(";");
					abs.append(ref);
				}
			}
			else break;
		}

		// get folio info
		String folio = null;
		String pageNum = null;
		long pageID = -1;
		int i = pg.indexOf("<pb ");
		int e = pg.indexOf(">", i);
		if (e > -1) {
			String tag = pg.substring(i+1, e);
			folio = Transcription.getAttribute(tag, "n");
			try { pageID = Long.parseLong(Transcription.getAttribute(tag, "xml:id")); } catch(Exception e2) {}
			String fType = Transcription.getAttribute(tag, "type");
			String osisID = Transcription.getAttribute(tag, "osisID");
			if (osisID != null && osisID.length() > 0) {
				abs = new StringBuffer(osisID);
			}
			String pageIDAttr = Transcription.getAttribute(tag, "pageID");
			if (pageIDAttr != null && pageIDAttr.length() > 0) {
				try { pageID = Long.parseLong(pageIDAttr); } catch (Exception e2) {}
			}
			if ("page".equals(fType)) {
				pageNum = folio; folio = null;
			}
		}
//logger.info("processed page: folio: " + folio + "; page: " + pageNum + "; verses: "  + abs);
if (first) {
//logger.info("first page text: " + pg);
	first = false;
}
		
%>
<page verses="<%=HTTPUtils.canonize(abs.toString())%>" <%=(pageID != -1 ? ("pageID=\""+pageID+"\" "):"")%> <%=pageNum != null ? "page=\""+pageNum+"\"" : "folio=\""+folio+"\""%>><%= "pagetext".equals(format) ? HTTPUtils.canonize(pg.toString()) : pg %></page>
<%
	}
%>
</pages>
<tail><%=HTTPUtils.canonize(transcription.getTail())%></tail>
</split>
<%
	return;
}

} catch (Exception e) { e.printStackTrace(); return; }
%>
<%!
public static void dumpCallInfo(HttpServletRequest request, Logger logger) {
logger.info("transcript/splitpages called...");
logger.info("dumping headers.................................................");
logger.info("request.getContentType: " + request.getContentType());
logger.info("Headers: ");
	for (Object o: java.util.Collections.list(request.getHeaderNames())) {
logger.info(o + "=" + request.getHeader(o.toString()));
	}
logger.info("Attributes: ");
	for (Object o: java.util.Collections.list(request.getAttributeNames())) {
logger.info(o + "=" + request.getAttribute(o.toString()));
	}
logger.info("Parameters: ");
	for (Object o: java.util.Collections.list(request.getParameterNames())) {
		for (String v: request.getParameterValues(o.toString())) {
logger.info(o + "=" + v);
		}
	}
logger.info("end of dumping headers..........................................");
}
%>
<html>
<body>
<h1>transcript/splitpages</h1>
<p>Split transcription XML into pages</p>
<h3>Parameters</h3>
<table border="1">
<tr><td><b>text</b></td><td>text of transcription</td></tr>
<tr><td><b>docID</b></td><td>docID, (optional) to help with context</td></tr>
<tr><td><b>format</b></td><td>(xml), pagetext</td></tr>
<tr><td><b>plainText</b></td><td>(false) or true, whether the specified input is plaintext or not</td></tr>
<tr><td><b>symbolGreek</b></td><td>(false) or true, whether the specified input is symbol greek instead of Unicode</td></tr>
<tr><td><b>ignoreUnderdot</b></td><td>(false) or true, whether or not to specially process the underdot as unclear</td></tr>
</table>
</body>
</html>