<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ page trimDirectiveWhitespaces="true" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Feature" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Document" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Apparatus.Segment" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Page" %> <%@ page import="org.crosswire.community.projects.ntmss.data.ShelfInstance" %> <%@ page import="org.crosswire.community.projects.ntmss.data.ProjectManagement" %> <%@ page import="org.crosswire.community.projects.ntmss.data.ProjectManagement.Project" %> <%@ page import="org.crosswire.community.projects.ntmss.data.Transcription" %> <%@ page import="org.crosswire.xml.XMLBlock" %> <%@ page import="org.crosswire.sword.keys.ListKey" %> <%@ page import="org.crosswire.sword.keys.VerseKey" %> <%@ page import="java.util.List" %> <%@ page import="java.util.Collections" %> <%@ page import="java.util.Arrays" %> <%@ page import="java.util.List" %> <%@ page import="java.util.ArrayList" %> <%@ page import="org.apache.log4j.Logger" %> <%@ page import="org.crosswire.webtools.annotation.*" %> <%@ page import="org.crosswire.webtools.*" %> <%! @Description(value = "Perform sanity checks on various parts of the dataset", name = "statistics/integritycheck") public static class MyParameters extends Parameters<MyParameters> { protected ProjectManagement.Project project = null; @Description(value = "Shows pages with verseCount > saneLimit", defaultValue = "false", example = "true for default saneLimit of 50 or e.g., 100 for custom limit") public Integer verseMax = null; @Description(value = "Shows documents with pages not assigned to a shelf instance", defaultValue = "false", example = "true") public Boolean pageNoShelf = false; @Description(value = "Shows shelf instances with no assigned pages", defaultValue = "false", example = "true") public Boolean shelfNoPage = false; @Description(value = "Shows descrepencies between index data and transcription", defaultValue = "false", example = "true") public Boolean indexToTranscription = false; @Description(value = "Shows apparatus readings with odd witness count", defaultValue = "false", example = "true") public Boolean appReadingWitnessCount = false; @Description(value = "Shows apparatus readings with zero witness count, excluding 'a' readings", defaultValue = "false", example = "true") public Boolean appReadingWitnessZero = false; @Description(value = "Limit results to docID min", example = "10000") public Integer docIDMin = null; @Description(value = "Limit results to docID max", example = "19999") public Integer docIDMax = null; @Description(value = "Limit checks to a Document Group", example = "1") public Integer documentGroupID; @Description(value = "Limit checks to a Project ID", defaultValue = "1", example = "43") public Integer projectID = 1; @Description(value = "Limit checks to a Project Name", example = "ECM Matthew") public String projectName; @Description(value = "Show this usage help", example = "true", defaultValue = "false") public Boolean help = false; // <tr><td><b>format</b></td><td>(html) | xml</td></tr> @Override protected void afterLoad() { if (verseMax == null && "true".equals(request.getParameter("verseMax"))) verseMax = 50; } @Override protected void customValidation() { projectName = Transcription.assureUnicode(projectName); if (projectID != null || projectName != null) { project = projectID != null ? ProjectManagement.getProject(projectID) : ProjectManagement.getProject(projectName); if (project == null) { addError(-7, "Project not found."); return; } } if (project != null && documentGroupID == null) { documentGroupID = project.getDocumentGroupID(); } } } %> <% MyParameters params = new MyParameters().loadFromRequest(request, response, false); if (params.getErrors().size() == 0 && !params.help) { int checkCount = 0; String format = request.getParameter("format"); if (format == null) format = "html"; boolean html = "html".equals(format); StringBuffer results = new StringBuffer(); String baseURL = request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort(); // + request.getContextPath(); if (params.verseMax != null) { ++checkCount; Page query = new Page(); String sql = "SELECT DOCUMENTID, PAGEID, COUNT(*) VERSECOUNT, MIN(USERID) INDEXER from BIBLICALCONTENT WHERE 1=1"; sql += " AND PAGEID < 99999"; if (params.docIDMin != null) sql += " AND DOCUMENTID >= " + params.docIDMin; if (params.docIDMax != null) sql += " AND DOCUMENTID <= " + params.docIDMax; sql += " GROUP BY DOCUMENTID, PAGEID"; sql += " HAVING VERSECOUNT > "+params.verseMax; sql += " ORDER BY VERSECOUNT DESC"; List<Page> rows = query.getDataSet(sql); if (html) { results.append("<h1>Verse Max - potential problems: " + rows.size() + "</h1>"); results.append("<table><tbody><tr><th>DocID</th><th>PageID</th><th>Verse Count</th><th>Indexer</th><th>FixMeURL</th></tr>"); } else { results.append("<verseMax count=\""+rows.size()+"\">"); } for (Page p : rows) { if (html) { results.append("<tr><td>"+p.getDocumentID()+"</td><td>"+p.getPageID()+"</td><td>"+p.getLongValue("VERSECOUNT")+"</td>"); results.append("<td><a target=\"_blank\" href=\""+baseURL+"/web/"+p.getStringValue("INDEXER")+"\">"+p.getStringValue("INDEXER")+"</a></td>"); results.append("<td><a target=\"_blank\" href=\""+baseURL+"/indexing/?docID="+p.getDocumentID()+"&pageID="+p.getPageID()+"\">fix me</a></td>"); results.append("</tr>"); } else { results.append("<page docID=\""+p.getDocumentID()+"\" pageID=\""+p.getPageID()+"\" verseCount=\""+p.getLongValue("VERSECOUNT")+"\""); results.append(" indexer=\""+p.getStringValue("INDEXER")+"\""); results.append(" fixMeURL=\""+baseURL+"/indexing/?docID="+p.getDocumentID()+"&pageID="+p.getPageID()+"\""); results.append(" />"); } } if (html) results.append("</tbody></table>"); else results.append("</verseMax>"); } if (params.shelfNoPage) { ++checkCount; ShelfInstance query = new ShelfInstance(); List<ShelfInstance> rows = query.getDataSet("SELECT * from SHELFINSTANCE SI join INSTITUTION I on I.INSTITUTIONID=SI.INSTITUTIONID WHERE not exists(select 1 from SHELFINSTANCEPAGES SIP where SIP.SHELFID=SI.SHELFID) ORDER BY I.COUNTRY, I.PLACE, I.NAME"); if (html) { results.append("<h1>Shelf Instances with no pages assigned - potential problems: " + rows.size() + "</h1>"); results.append("<table><tbody><tr><th>ShelfID</th><th>Country</th><th>Place</th><th>Institution Name</th><th>Shelf Number</th><th>FixMeURL</th></tr>"); } else { results.append("<shelfNoPage count=\""+rows.size()+"\">"); } for (ShelfInstance si : rows) { if (html) { results.append("<tr><td>"+si.getShelfInstanceID()+"</td><td>"+si.getCountry()+"</td><td>"+si.getPlace()+"</td><td>"+si.getInstitutionName()+"</td><td>"+si.getShelfNumber()+"</td>"); results.append("<td><a target=\"_blank\" href=\""+baseURL+"/group/vmr-administration/page-and-image/?instID="+si.getInstitutionID()+"&shelfID="+si.getShelfInstanceID()+"\">fix me</a></td>"); results.append("</tr>"); } else { XMLBlock b = XMLBlock.createXMLBlock(si.toFormattedXML()); b.setAttribute("fixMeURL", baseURL+"/group/vmr-administration/page-and-image/?instID="+si.getInstitutionID()+"&shelfID="+si.getShelfInstanceID()); results.append(b.toString()); } } if (html) results.append("</tbody></table>"); else results.append("</shelfNoPage>"); } if (params.pageNoShelf) { ++checkCount; Document query = new Document(); String sql = "SELECT D.*, count(1) TOTALUNASSIGNEDPAGES, min(PAGEID) MINUNASSIGNEDPAGEID from DOCUMENT D join PAGE P on P.DOCUMENTID=D.DOCUMENTID WHERE not exists(select 1 from SHELFINSTANCEPAGES SIP where SIP.DOCUMENTID=P.DOCUMENTID and SIP.PAGEID=P.PAGEID)"; if (params.docIDMin != null) sql += " AND D.DOCUMENTID >= " + params.docIDMin; if (params.docIDMax != null) sql += " AND D.DOCUMENTID <= " + params.docIDMax; List<Document> rows = query.getDataSet(sql + " GROUP BY D.DOCUMENTID ORDER BY TOTALUNASSIGNEDPAGES DESC"); if (html) { results.append("<h1>Documents with pages unassigned to shelf instances - potential problems: " + rows.size() + "</h1>"); results.append("<table><tbody><tr><th>DocID</th><th>Unassigned Page Count</th><th>First Unassigned PageID</th><th>FixMeURL</th></tr>"); } else { results.append("<pageNoShelf count=\""+rows.size()+"\">"); } for (Document d : rows) { if (html) { results.append("<tr><td>"+d.getDocumentID()+"</td><td>"+d.getLongValue("TOTALUNASSIGNEDPAGES")+"</td><td>"+d.getLongValue("MINUNASSIGNEDPAGEID")+"</td>"); results.append("<td><a target=\"_blank\" href=\""+baseURL+"/group/vmr-administration/page-and-image/?docID="+d.getDocumentID()+"\">fix me</a></td>"); results.append("</tr>"); } else { results.append("<doc docID=\""+d.getDocumentID()+"\" totalUnassignedPages=\""+d.getLongValue("TOTALUNASSIGNEDPAGES")+"\" firstUnassignedPageID=\""+d.getLongValue("MINUNASSIGNEDPAGEID")+"\""); results.append(" fixMeURL=\""+baseURL+"/group/vmr-administration/page-and-image/?docID="+d.getDocumentID()+"\""); results.append(" />"); } } if (html) results.append("</tbody></table>"); else results.append("</pageNoShelf>"); } if (params.indexToTranscription) { ++checkCount; Page query = new Page(); String sql = "SELECT D.*, P.*, count(BC.VERSE) VERSECOUNTBC from DOCUMENT D join PAGE P on P.DOCUMENTID=D.DOCUMENTID right join BIBLICALCONTENT BC on BC.DOCUMENTID=P.DOCUMENTID and BC.PAGEID=P.PAGEID WHERE 1=1"; if (params.docIDMin != null) sql += " AND D.DOCUMENTID >= " + params.docIDMin; if (params.docIDMax != null) sql += " AND D.DOCUMENTID <= " + params.docIDMax; sql += " GROUP BY P.DOCUMENTID, P.PAGEID"; List<Page> rows = query.getDataSet(sql); List<Page> problems = new ArrayList<Page>(); for (Page p : rows) { ListKey ic = p.getIndexContent(); List<Integer> icVerses = new ArrayList<Integer>(); ListKey tic = p.getIndexContentFromTranscription(false); List<Integer> ticVerses = new ArrayList<Integer>(); for (ic.setPosition(ic.TOP); ic.popError() == 0; ic.increment()) { int ich = new VerseKey(ic).getHashNumber(); if (ich % 1000 != 0 || ich % 1000000 == 0) { icVerses.add(new VerseKey(ic).getHashNumber()); } } for (tic.setPosition(ic.TOP); tic.popError() == 0; tic.increment()) { ticVerses.add(new VerseKey(tic).getHashNumber()); } Collections.sort(icVerses); Collections.sort(ticVerses); boolean same = Arrays.equals(icVerses.toArray(new Integer[0]), ticVerses.toArray(new Integer[0])); if ((ticVerses.size() != 0 && icVerses.size() != 0) && !same) problems.add(p); } if (html) { results.append("<h1>Pages with transcription verses which do not match index content; problems: " + problems.size() + "</h1>"); results.append("<table><tbody><tr><th>DocID</th><th>PageID</th><th>indexContent</th><th>transcriptionIndexContent</th><th>Check Bibles</th><th>FixMeURL Indexing</th><th>FixMeURL Transcription</th></tr>"); } else { results.append("<indexToTranscription count=\""+problems.size()+"\">"); } for (Page d : problems) { if (html) { results.append("<tr><td>"+d.getDocumentID()+"</td><td>"+d.getPageID()+"</td><td>"+d.getIndexContent().getShortRangeText()+"</td><td>"+d.getIndexContentFromTranscription(false).getShortRangeText()+"</td>"); results.append("<td><a target=\"_blank\" href=\"https://crosswire.org/study/parallelstudy.jsp?del=all&add=CopSahBible2&add=WLC&add=LXX&add=NASB&key="+d.getIndexContent().getShortRangeText()+"#cv\">Bibles</a></td>"); results.append("<td><a target=\"_blank\" href=\""+baseURL+"/indexing?docID="+d.getDocumentID()+"&pageID="+d.getPageID()+"\">fix me: indexing</a></td>"); results.append("<td><a target=\"_blank\" href=\""+baseURL+"/transcribing?docID="+d.getDocumentID()+"&pageID="+d.getPageID()+"\">fix me: transcription</a></td>"); results.append("</tr>"); } else { results.append("<doc docID=\""+d.getDocumentID()+"\" totalUnassignedPages=\""+d.getLongValue("TOTALUNASSIGNEDPAGES")+"\" firstUnassignedPageID=\""+d.getLongValue("MINUNASSIGNEDPAGEID")+"\""); results.append(" fixMeURL=\""+baseURL+"/group/vmr-administration/page-and-image/?docID="+d.getDocumentID()+"\""); results.append(" />"); } } if (html) results.append("</tbody></table>"); else results.append("</indexToTranscription>"); } if (params.appReadingWitnessCount) { int amax = 10; int zzmax = 10; ++checkCount; Segment query = new Segment(); String sql = "select VERSE, CONTEXTDESCRIPTION, (select count(1) from SEGMENTREADING SR LEFT JOIN SEGMENTREADINGWITNESS SRW on SRW.SEGMENTREADINGID=SR.SEGMENTREADINGID where SR.SEGMENTID=S.SEGMENTID and SR.READINGNAME='a' and SRW.PROJECTID="+params.projectID+") ACOUNT, (select count(1) from SEGMENTREADING SR LEFT JOIN SEGMENTREADINGWITNESS SRW on SRW.SEGMENTREADINGID=SR.SEGMENTREADINGID where SR.SEGMENTID=S.SEGMENTID and SR.READINGNAME='zz' and SRW.PROJECTID="+params.projectID+") ZZCOUNT from SEGMENT S left join SEGMENTGROUPSEGMENT SG on SG.SEGMENTID=S.SEGMENTID WHERE SEGMENTGROUPID="+params.projectID+" HAVING ACOUNT <= " + amax + " or ZZCOUNT<= " + zzmax + " ORDER BY VERSE, CONTEXTDESCRIPTION;"; List<Segment> rows = query.getDataSet(sql); Collections.sort(rows); if (html) { results.append("<h1>Segments with 'a' reading witness count <= " + amax + " or 'zz' witness count <= " + zzmax + " - potential problems (or overlapping segments): " + rows.size() + "</h1>"); results.append("<table><tbody><tr><th>Verse</th><th>Context Description</th><th>'a' wit<br/>count</th><th>'zz' wit<br/>count</th><th>FixMeURL</th></tr>"); } else { results.append("<appReadingWitnessCount count=\""+rows.size()+"\">"); } for (Segment d : rows) { if (html) { results.append("<tr><td>"+d.getVerseTextShort()+"</td><td>"+d.getContextDescription()+"</td><td>"+d.getLongValue("ACOUNT")+"</td><td>"+d.getLongValue("ZZCOUNT")+"</td>"); results.append("<td><a target=\"_blank\" href=\""+baseURL+"/web/ecm-matthew/apparatus-manual-editor/?verse="+d.getVerseOSISRef()+"&segment="+d.getContextDescription() +"\">fix me</a></td>"); results.append("</tr>"); } else { results.append("<segment verseHash=\""+d.getVerseHash()+"\" contextDescription=\""+d.getContextDescription()+"\" aCount=\""+d.getLongValue("ACOUNT")+"\" zzCount=\""+d.getLongValue("ZZCOUNT")+"\""); results.append(" fixMeURL=\""+baseURL+"/web/ecm-matthew/apparatus-manual-editor/?verse="+d.getVerseOSISRef()+"&segment="+d.getContextDescription() +"\""); results.append(" />"); } } if (html) results.append("</tbody></table>"); else results.append("</appReadingWitnessCount>"); } if (params.appReadingWitnessZero) { ++checkCount; Segment query = new Segment(); String sql = "select VERSE, CONTEXTDESCRIPTION, READINGNAME, READING, SR.CREATEDATE, SR.USERID, SR.SEGMENTREADINGID, SR.PROJECTID from SEGMENTREADING SR left join SEGMENT S on S.SEGMENTID=SR.SEGMENTID left join SEGMENTGROUPSEGMENT SGS on SGS.SEGMENTID=SR.SEGMENTID where not exists (select 1 from SEGMENTREADINGWITNESS SRW where SRW.SEGMENTID=SR.SEGMENTID and SRW.SEGMENTREADINGID=SR.SEGMENTREADINGID) and VERSE between 2001000000 and 2002000000 and SEGMENTGROUPID=" + params.projectID + " and READINGNAME <> 'a'"; List<Segment> rows = query.getDataSet(sql); Collections.sort(rows); if (html) { results.append("<h1>Segments with non-'a' reading witness count = 0; count: " + rows.size() + "</h1>"); results.append("<table><tbody><tr><th>Verse</th><th>Context Description</th><th>Reading<br/>Label</th><th>Reading<br/>Text</th><th>Create<br/>Date</th><th>User</th><th>ProjectID</th><th>FixMeURL</th><th>ShowMeURL</th><th>DeleteMeURL</th></tr>"); } else { results.append("<appReadingWitnessCount count=\""+rows.size()+"\">"); } for (Segment d : rows) { if (html) { results.append("<tr><td>"+d.getVerseTextShort()+"</td><td>"+d.getContextDescription()+"</td><td>"+d.getStringValue("READINGNAME")+"</td><td>"+d.getStringValue("READING")+"</td><td>"+d.getStringValue("CREATEDATE")+"</td><td>"+d.getStringValue("USERID")+"</td><td>"+d.getStringValue("PROJECTID")+"</td>"); results.append("<td><a target=\"_blank\" href=\""+baseURL+"/web/ecm-matthew/apparatus-master-editor/?indexContent="+d.getVerseOSISRef()+"&segment="+d.getContextDescription() +"\">fix me</a></td>"); results.append("<td><a target=\"_blank\" href=\""+baseURL+"/community/vmr/api/variant/apparatus/get/?indexContent="+d.getVerseOSISRef()+"&includeBaseline=ECM&segmentGroupID="+params.projectID+"&segment="+d.getContextDescription()+"&format=html\">show me</a></td>"); results.append("<td><a target=\"_blank\" href=\"#\" onclick=\"deleteReading(this); return false;\" data-readingid=\""+d.getIntValue("SEGMENTREADINGID")+"\">delete me</a></td>"); results.append("</tr>"); } else { results.append("<segment verseHash=\""+d.getVerseHash()+"\" contextDescription=\""+d.getContextDescription()+"\" aCount=\""+d.getLongValue("ACOUNT")+"\" zzCount=\""+d.getLongValue("ZZCOUNT")+"\""); results.append(" fixMeURL=\""+baseURL+"/web/ecm-matthew/apparatus-manual-editor/?verse="+d.getVerseOSISRef()+"&segment="+d.getContextDescription() +"\""); results.append(" />"); } } if (html) { results.append("</tbody></table>"); results.append("<script>"); results.append("var apiBase = '" + baseURL + "/community/vmr/api/';"); results.append("function deleteReading(e) {"); results.append("var rid = $(e).attr('data-readingid');"); results.append("$.post(apiBase + 'variant/reading/delete/', { segmentReadingID : rid }, function(xml) {"); results.append("var user = $(xml).find('user');"); results.append("$(e).parent().text('done.');"); results.append("});"); results.append("}"); results.append("</script>"); } else results.append("</appReadingWitnessCount>"); } if (checkCount > 0) { if (html) { %> <html> <head> <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script> <style> thead tr { background-color: rgb(240, 240, 240); } thead tr { font-weight: bold; } th, td { border-color: rgb(221, 221, 221); border-style: solid; border-width: 0.8px; padding: 10px 7px 15px 7px; text-align: left; } tbody th { background-color: rgb(240, 240, 240); font-weight: normal; } table { border-collapse: collapse; } </style> </head> <body> <%=results%> </body> </html> <% } else { response.setContentType("text/xml"); %> <integrityCheck checkCount="<%=checkCount%>"> <%=results%> </integrityCheck> <% } return; } } params.format = "html"; Serializer.reportErrors(request, response, out, params, true); %>