<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
<%@ page trimDirectiveWhitespaces="true" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Feature" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Document" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Apparatus.Segment" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Page" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.ShelfInstance" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.ProjectManagement" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.ProjectManagement.Project" %>
<%@ page import="org.crosswire.community.projects.ntmss.data.Transcription" %>
<%@ page import="org.crosswire.xml.XMLBlock" %>
<%@ page import="org.crosswire.sword.keys.ListKey" %>
<%@ page import="org.crosswire.sword.keys.VerseKey" %>
<%@ page import="java.util.List" %>
<%@ page import="java.util.Collections" %>
<%@ page import="java.util.Arrays" %>
<%@ page import="java.util.List" %>
<%@ page import="java.util.ArrayList" %>
<%@ page import="org.apache.log4j.Logger" %>
<%@ page import="org.crosswire.webtools.annotation.*" %>
<%@ page import="org.crosswire.webtools.*" %>
<%!

@Description(value = "Perform sanity checks on various parts of the dataset", name = "statistics/integritycheck")
public static class MyParameters extends Parameters<MyParameters> {
	protected ProjectManagement.Project project = null;

	@Description(value = "Shows pages with verseCount > saneLimit", defaultValue = "false", example = "true for default saneLimit of 50 or e.g., 100 for custom limit")
	public Integer verseMax = null;

	@Description(value = "Shows documents with pages not assigned to a shelf instance", defaultValue = "false", example = "true")
	public Boolean pageNoShelf = false;

	@Description(value = "Shows shelf instances with no assigned pages", defaultValue = "false", example = "true")
	public Boolean shelfNoPage = false;

	@Description(value = "Shows descrepencies between index data and transcription", defaultValue = "false", example = "true")
	public Boolean indexToTranscription = false;

	@Description(value = "Shows apparatus readings with odd witness count", defaultValue = "false", example = "true")
	public Boolean appReadingWitnessCount = false;

	@Description(value = "Shows apparatus readings with zero witness count, excluding 'a' readings", defaultValue = "false", example = "true")
	public Boolean appReadingWitnessZero = false;

	@Description(value = "Limit results to docID min", example = "10000")
	public Integer docIDMin = null;

	@Description(value = "Limit results to docID max", example = "19999")
	public Integer docIDMax = null;

	@Description(value = "Limit checks to a Document Group", example = "1")
	public Integer documentGroupID;

	@Description(value = "Limit checks to a Project ID", defaultValue = "1", example = "43")
	public Integer projectID = 1;

	@Description(value = "Limit checks to a Project Name", example = "ECM Matthew")
	public String projectName;

	@Description(value = "Show this usage help", example = "true", defaultValue = "false")
	public Boolean help = false;

// <tr><td><b>format</b></td><td>(html) | xml</td></tr>

	@Override
	protected void afterLoad() {
		if (verseMax == null && "true".equals(request.getParameter("verseMax"))) verseMax = 50;
	}

	@Override
	protected void customValidation() {
		projectName = Transcription.assureUnicode(projectName);
		if (projectID != null || projectName != null) {
			project = projectID != null ? ProjectManagement.getProject(projectID) : ProjectManagement.getProject(projectName);
			if (project == null) {
				addError(-7, "Project not found.");
				return;
			}
		}
		if (project != null && documentGroupID == null) {
			documentGroupID = project.getDocumentGroupID();
		}
	}
}

%>

<%

MyParameters params = new MyParameters().loadFromRequest(request, response, false);

if (params.getErrors().size() == 0 && !params.help) {

int checkCount = 0;
String format = request.getParameter("format");
if (format == null) format = "html";

boolean html = "html".equals(format);

StringBuffer results = new StringBuffer();

String baseURL = request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort(); // + request.getContextPath();

if (params.verseMax != null) {
	++checkCount;
	Page query = new Page();
	String sql = "SELECT DOCUMENTID, PAGEID, COUNT(*) VERSECOUNT, MIN(USERID) INDEXER from BIBLICALCONTENT WHERE 1=1";
	sql += " AND PAGEID < 99999";
	if (params.docIDMin != null) sql += " AND DOCUMENTID >= " + params.docIDMin;
	if (params.docIDMax != null) sql += " AND DOCUMENTID <= " + params.docIDMax;
	sql += " GROUP BY DOCUMENTID, PAGEID";
	sql += " HAVING VERSECOUNT > "+params.verseMax;
	sql += " ORDER BY VERSECOUNT DESC";
	List<Page> rows = query.getDataSet(sql);
	if (html) {
		results.append("<h1>Verse Max - potential problems: " + rows.size() + "</h1>");
		results.append("<table><tbody><tr><th>DocID</th><th>PageID</th><th>Verse Count</th><th>Indexer</th><th>FixMeURL</th></tr>");
	}
	else {
		results.append("<verseMax count=\""+rows.size()+"\">");
	}
	for (Page p : rows) {
		if (html) {
			results.append("<tr><td>"+p.getDocumentID()+"</td><td>"+p.getPageID()+"</td><td>"+p.getLongValue("VERSECOUNT")+"</td>");
			results.append("<td><a target=\"_blank\" href=\""+baseURL+"/web/"+p.getStringValue("INDEXER")+"\">"+p.getStringValue("INDEXER")+"</a></td>");
			results.append("<td><a target=\"_blank\" href=\""+baseURL+"/indexing/?docID="+p.getDocumentID()+"&amp;pageID="+p.getPageID()+"\">fix me</a></td>");
			results.append("</tr>");
		}
		else {
			results.append("<page docID=\""+p.getDocumentID()+"\" pageID=\""+p.getPageID()+"\" verseCount=\""+p.getLongValue("VERSECOUNT")+"\"");
			results.append(" indexer=\""+p.getStringValue("INDEXER")+"\"");
			results.append(" fixMeURL=\""+baseURL+"/indexing/?docID="+p.getDocumentID()+"&amp;pageID="+p.getPageID()+"\"");
			results.append(" />");
		}
	}
	if (html) results.append("</tbody></table>"); else results.append("</verseMax>");

}

if (params.shelfNoPage) {
	++checkCount;
	ShelfInstance query = new ShelfInstance();
	List<ShelfInstance> rows = query.getDataSet("SELECT * from SHELFINSTANCE SI join INSTITUTION I on I.INSTITUTIONID=SI.INSTITUTIONID WHERE not exists(select 1 from SHELFINSTANCEPAGES SIP where SIP.SHELFID=SI.SHELFID) ORDER BY I.COUNTRY, I.PLACE, I.NAME");
	if (html) {
		results.append("<h1>Shelf Instances with no pages assigned - potential problems: " + rows.size() + "</h1>");
		results.append("<table><tbody><tr><th>ShelfID</th><th>Country</th><th>Place</th><th>Institution Name</th><th>Shelf Number</th><th>FixMeURL</th></tr>");
	}
	else {
		results.append("<shelfNoPage count=\""+rows.size()+"\">");
	}
	for (ShelfInstance si : rows) {
		if (html) {
			results.append("<tr><td>"+si.getShelfInstanceID()+"</td><td>"+si.getCountry()+"</td><td>"+si.getPlace()+"</td><td>"+si.getInstitutionName()+"</td><td>"+si.getShelfNumber()+"</td>");
			results.append("<td><a target=\"_blank\" href=\""+baseURL+"/group/vmr-administration/page-and-image/?instID="+si.getInstitutionID()+"&shelfID="+si.getShelfInstanceID()+"\">fix me</a></td>");
			results.append("</tr>");
		}
		else {
			XMLBlock b = XMLBlock.createXMLBlock(si.toFormattedXML());
			b.setAttribute("fixMeURL", baseURL+"/group/vmr-administration/page-and-image/?instID="+si.getInstitutionID()+"&amp;shelfID="+si.getShelfInstanceID());
			results.append(b.toString());
		}
	}
	if (html) results.append("</tbody></table>"); else results.append("</shelfNoPage>");

}

if (params.pageNoShelf) {
	++checkCount;
	Document query = new Document();
	String sql = "SELECT D.*, count(1) TOTALUNASSIGNEDPAGES, min(PAGEID) MINUNASSIGNEDPAGEID from DOCUMENT D join PAGE P on P.DOCUMENTID=D.DOCUMENTID WHERE not exists(select 1 from SHELFINSTANCEPAGES SIP where SIP.DOCUMENTID=P.DOCUMENTID and SIP.PAGEID=P.PAGEID)";
	if (params.docIDMin != null) sql += " AND D.DOCUMENTID >= " + params.docIDMin;
	if (params.docIDMax != null) sql += " AND D.DOCUMENTID <= " + params.docIDMax;
	List<Document> rows = query.getDataSet(sql + " GROUP BY D.DOCUMENTID ORDER BY TOTALUNASSIGNEDPAGES DESC");
	if (html) {
		results.append("<h1>Documents with pages unassigned to shelf instances - potential problems: " + rows.size() + "</h1>");
		results.append("<table><tbody><tr><th>DocID</th><th>Unassigned Page Count</th><th>First Unassigned PageID</th><th>FixMeURL</th></tr>");
	}
	else {
		results.append("<pageNoShelf count=\""+rows.size()+"\">");
	}
	for (Document d : rows) {
		if (html) {
			results.append("<tr><td>"+d.getDocumentID()+"</td><td>"+d.getLongValue("TOTALUNASSIGNEDPAGES")+"</td><td>"+d.getLongValue("MINUNASSIGNEDPAGEID")+"</td>");
			results.append("<td><a target=\"_blank\" href=\""+baseURL+"/group/vmr-administration/page-and-image/?docID="+d.getDocumentID()+"\">fix me</a></td>");
			results.append("</tr>");
		}
		else {
			results.append("<doc docID=\""+d.getDocumentID()+"\" totalUnassignedPages=\""+d.getLongValue("TOTALUNASSIGNEDPAGES")+"\" firstUnassignedPageID=\""+d.getLongValue("MINUNASSIGNEDPAGEID")+"\"");
			results.append(" fixMeURL=\""+baseURL+"/group/vmr-administration/page-and-image/?docID="+d.getDocumentID()+"\"");
			results.append(" />");
		}
	}
	if (html) results.append("</tbody></table>"); else results.append("</pageNoShelf>");

}
if (params.indexToTranscription) {
	++checkCount;
	Page query = new Page();
	String sql = "SELECT D.*, P.*, count(BC.VERSE) VERSECOUNTBC from DOCUMENT D join PAGE P on P.DOCUMENTID=D.DOCUMENTID right join BIBLICALCONTENT BC on BC.DOCUMENTID=P.DOCUMENTID and BC.PAGEID=P.PAGEID WHERE 1=1";
	if (params.docIDMin != null) sql += " AND D.DOCUMENTID >= " + params.docIDMin;
	if (params.docIDMax != null) sql += " AND D.DOCUMENTID <= " + params.docIDMax;
	sql += " GROUP BY P.DOCUMENTID, P.PAGEID";

	List<Page> rows = query.getDataSet(sql);

	List<Page> problems = new ArrayList<Page>();
	for (Page p : rows) {
		ListKey ic = p.getIndexContent();
		List<Integer> icVerses = new ArrayList<Integer>();
		ListKey tic = p.getIndexContentFromTranscription(false);
		List<Integer> ticVerses = new ArrayList<Integer>();
		for (ic.setPosition(ic.TOP); ic.popError() == 0; ic.increment()) {
			int ich = new VerseKey(ic).getHashNumber();
			if (ich % 1000 != 0 || ich % 1000000 == 0) {
				icVerses.add(new VerseKey(ic).getHashNumber());
			}
		}
		for (tic.setPosition(ic.TOP); tic.popError() == 0; tic.increment()) { ticVerses.add(new VerseKey(tic).getHashNumber()); }
		Collections.sort(icVerses);
		Collections.sort(ticVerses);
		boolean same = Arrays.equals(icVerses.toArray(new Integer[0]), ticVerses.toArray(new Integer[0]));

		if ((ticVerses.size() != 0 && icVerses.size() != 0) && !same) problems.add(p);
	}
	if (html) {
		results.append("<h1>Pages with transcription verses which do not match index content; problems: " + problems.size() + "</h1>");
		results.append("<table><tbody><tr><th>DocID</th><th>PageID</th><th>indexContent</th><th>transcriptionIndexContent</th><th>Check Bibles</th><th>FixMeURL Indexing</th><th>FixMeURL Transcription</th></tr>");
	}
	else {
		results.append("<indexToTranscription count=\""+problems.size()+"\">");
	}
	for (Page d : problems) {
		if (html) {
			results.append("<tr><td>"+d.getDocumentID()+"</td><td>"+d.getPageID()+"</td><td>"+d.getIndexContent().getShortRangeText()+"</td><td>"+d.getIndexContentFromTranscription(false).getShortRangeText()+"</td>");
			results.append("<td><a target=\"_blank\" href=\"https://crosswire.org/study/parallelstudy.jsp?del=all&add=CopSahBible2&add=WLC&add=LXX&add=NASB&key="+d.getIndexContent().getShortRangeText()+"#cv\">Bibles</a></td>");
			results.append("<td><a target=\"_blank\" href=\""+baseURL+"/indexing?docID="+d.getDocumentID()+"&pageID="+d.getPageID()+"\">fix me: indexing</a></td>");
			results.append("<td><a target=\"_blank\" href=\""+baseURL+"/transcribing?docID="+d.getDocumentID()+"&pageID="+d.getPageID()+"\">fix me: transcription</a></td>");
			results.append("</tr>");
		}
		else {
			results.append("<doc docID=\""+d.getDocumentID()+"\" totalUnassignedPages=\""+d.getLongValue("TOTALUNASSIGNEDPAGES")+"\" firstUnassignedPageID=\""+d.getLongValue("MINUNASSIGNEDPAGEID")+"\"");
			results.append(" fixMeURL=\""+baseURL+"/group/vmr-administration/page-and-image/?docID="+d.getDocumentID()+"\"");
			results.append(" />");
		}
	}
	if (html) results.append("</tbody></table>"); else results.append("</indexToTranscription>");

}
if (params.appReadingWitnessCount) {
	int amax = 10;
	int zzmax = 10;
	++checkCount;
	Segment query = new Segment();

	String sql = "select VERSE, CONTEXTDESCRIPTION, (select count(1) from SEGMENTREADING SR LEFT JOIN SEGMENTREADINGWITNESS SRW on SRW.SEGMENTREADINGID=SR.SEGMENTREADINGID where SR.SEGMENTID=S.SEGMENTID and SR.READINGNAME='a' and SRW.PROJECTID="+params.projectID+") ACOUNT, (select count(1) from SEGMENTREADING SR LEFT JOIN SEGMENTREADINGWITNESS SRW on SRW.SEGMENTREADINGID=SR.SEGMENTREADINGID where SR.SEGMENTID=S.SEGMENTID and SR.READINGNAME='zz' and SRW.PROJECTID="+params.projectID+") ZZCOUNT from SEGMENT S left join SEGMENTGROUPSEGMENT SG on SG.SEGMENTID=S.SEGMENTID WHERE SEGMENTGROUPID="+params.projectID+" HAVING ACOUNT <= " + amax + " or ZZCOUNT<= " + zzmax + " ORDER BY VERSE, CONTEXTDESCRIPTION;";

	List<Segment> rows = query.getDataSet(sql);
	Collections.sort(rows);

	if (html) {
		results.append("<h1>Segments with 'a' reading witness count <= " + amax + " or 'zz' witness count <= " + zzmax + " - potential problems (or overlapping segments): " + rows.size() + "</h1>");
		results.append("<table><tbody><tr><th>Verse</th><th>Context Description</th><th>'a' wit<br/>count</th><th>'zz' wit<br/>count</th><th>FixMeURL</th></tr>");
	}
	else {
		results.append("<appReadingWitnessCount count=\""+rows.size()+"\">");
	}
	for (Segment d : rows) {
		if (html) {
			results.append("<tr><td>"+d.getVerseTextShort()+"</td><td>"+d.getContextDescription()+"</td><td>"+d.getLongValue("ACOUNT")+"</td><td>"+d.getLongValue("ZZCOUNT")+"</td>");
			results.append("<td><a target=\"_blank\" href=\""+baseURL+"/web/ecm-matthew/apparatus-manual-editor/?verse="+d.getVerseOSISRef()+"&segment="+d.getContextDescription() +"\">fix me</a></td>");
			results.append("</tr>");
		}
		else {
			results.append("<segment verseHash=\""+d.getVerseHash()+"\" contextDescription=\""+d.getContextDescription()+"\" aCount=\""+d.getLongValue("ACOUNT")+"\" zzCount=\""+d.getLongValue("ZZCOUNT")+"\"");
			results.append(" fixMeURL=\""+baseURL+"/web/ecm-matthew/apparatus-manual-editor/?verse="+d.getVerseOSISRef()+"&amp;segment="+d.getContextDescription() +"\"");
			results.append(" />");
		}
	}
	if (html) results.append("</tbody></table>"); else results.append("</appReadingWitnessCount>");


}
if (params.appReadingWitnessZero) {
	++checkCount;
	Segment query = new Segment();

	String sql = "select VERSE, CONTEXTDESCRIPTION, READINGNAME, READING, SR.CREATEDATE, SR.USERID, SR.SEGMENTREADINGID, SR.PROJECTID from SEGMENTREADING SR left join SEGMENT S on S.SEGMENTID=SR.SEGMENTID left join SEGMENTGROUPSEGMENT SGS on SGS.SEGMENTID=SR.SEGMENTID where not exists (select 1 from SEGMENTREADINGWITNESS SRW where SRW.SEGMENTID=SR.SEGMENTID and SRW.SEGMENTREADINGID=SR.SEGMENTREADINGID) and VERSE between 2001000000 and 2002000000 and SEGMENTGROUPID=" + params.projectID + " and READINGNAME <> 'a'";

	List<Segment> rows = query.getDataSet(sql);
	Collections.sort(rows);

	if (html) {
		results.append("<h1>Segments with non-'a' reading witness count = 0; count: " + rows.size() + "</h1>");
		results.append("<table><tbody><tr><th>Verse</th><th>Context Description</th><th>Reading<br/>Label</th><th>Reading<br/>Text</th><th>Create<br/>Date</th><th>User</th><th>ProjectID</th><th>FixMeURL</th><th>ShowMeURL</th><th>DeleteMeURL</th></tr>");
	}
	else {
		results.append("<appReadingWitnessCount count=\""+rows.size()+"\">");
	}
	for (Segment d : rows) {
		if (html) {
			results.append("<tr><td>"+d.getVerseTextShort()+"</td><td>"+d.getContextDescription()+"</td><td>"+d.getStringValue("READINGNAME")+"</td><td>"+d.getStringValue("READING")+"</td><td>"+d.getStringValue("CREATEDATE")+"</td><td>"+d.getStringValue("USERID")+"</td><td>"+d.getStringValue("PROJECTID")+"</td>");
			results.append("<td><a target=\"_blank\" href=\""+baseURL+"/web/ecm-matthew/apparatus-master-editor/?indexContent="+d.getVerseOSISRef()+"&segment="+d.getContextDescription() +"\">fix me</a></td>");
			results.append("<td><a target=\"_blank\" href=\""+baseURL+"/community/vmr/api/variant/apparatus/get/?indexContent="+d.getVerseOSISRef()+"&includeBaseline=ECM&segmentGroupID="+params.projectID+"&segment="+d.getContextDescription()+"&format=html\">show me</a></td>");
			results.append("<td><a target=\"_blank\" href=\"#\" onclick=\"deleteReading(this); return false;\" data-readingid=\""+d.getIntValue("SEGMENTREADINGID")+"\">delete me</a></td>");
			results.append("</tr>");
		}
		else {
			results.append("<segment verseHash=\""+d.getVerseHash()+"\" contextDescription=\""+d.getContextDescription()+"\" aCount=\""+d.getLongValue("ACOUNT")+"\" zzCount=\""+d.getLongValue("ZZCOUNT")+"\"");
			results.append(" fixMeURL=\""+baseURL+"/web/ecm-matthew/apparatus-manual-editor/?verse="+d.getVerseOSISRef()+"&amp;segment="+d.getContextDescription() +"\"");
			results.append(" />");
		}
	}
	if (html) {
		results.append("</tbody></table>");
		results.append("<script>");

		results.append("var apiBase = '" + baseURL + "/community/vmr/api/';");
		results.append("function deleteReading(e) {");
		results.append("var rid = $(e).attr('data-readingid');");
		results.append("$.post(apiBase + 'variant/reading/delete/', { segmentReadingID : rid }, function(xml) {");
		results.append("var user = $(xml).find('user');");
		results.append("$(e).parent().text('done.');");
		results.append("});");
		results.append("}");

		results.append("</script>");
	}
	else results.append("</appReadingWitnessCount>");


}
if (checkCount > 0) {
	if (html) {
%>
<html>
<head>
	<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
<style>
thead tr {
    background-color: rgb(240, 240, 240);
}

thead tr {
    font-weight: bold;
}

th, td {
    border-color: rgb(221, 221, 221);
    border-style: solid;
    border-width: 0.8px;
    padding: 10px 7px 15px 7px;
    text-align: left;
}

tbody th {
    background-color: rgb(240, 240, 240);
    font-weight: normal;
}

table {
    border-collapse: collapse;
}
</style>
</head>
<body>
<%=results%>
</body>
</html>
<%
	}
	else {
		response.setContentType("text/xml");
%>
<integrityCheck checkCount="<%=checkCount%>">
<%=results%>
</integrityCheck>
<%
	}
	return;
}
}
params.format = "html";
Serializer.reportErrors(request, response, out, params, true);
%>