<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%> <%@ page import="org.crosswire.utils.HTTPUtils" %> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US" lang="en-US"> <head> <meta http-equiv="content-type" content="text/html; charset=utf-8" /> <% String lhs = request.getParameter("lhs"); if (lhs != null) lhs = new String(lhs.getBytes("iso8859-1"), "UTF-8"); String rhs = request.getParameter("rhs"); if (rhs != null) rhs = new String(rhs.getBytes("iso8859-1"), "UTF-8"); %> <script type="text/javascript" charset=utf-8" src="jquery-1.6.4.min.js"></script> <script type="text/javascript"> <!-- charset=utf-8" src="reconciler.js"> --> var myURL='.'; var file1 = null; var file2 = null; var file1String; var file2String; var notesRegex = /\{(.*?)\}/g; //vat par check removed from regex var structureRegex = /[\||<](.*?)[\||>]/g; var bookRegex = /\|\s*?B\s*?(\d+?)\|/g; var chapterRegex = /\|\s*?K\s*?(\d+?)\|/g; var verseRegex = /\|\s*?V\s*?(\d+?)\|/g; var folioRegex = /^\|F\s*?(\d+?)([rvab])\S{0,1}\||^\|F([rvab])\s*?(\d+?)\S{0,1}\|/i; var pageRegex = /^\|P\s*?(\d+?)\|/i; var columnRegex = /^\|C\s*?(\d+?)\|/i; var linebreakRegex = /^\|L\s*?\d*\|/i; var file1Background = '#990033;'; var file1TextColour = 'white;'; var file2Background = '#009933;'; var file2TextColour = 'white;'; var checkedState = 'show'; var results = {}; function deletefile(file){ document.getElementById(file + '_details').innerHTML = ''; if (file === 'file1'){ file1 = null; } if (file === 'file2'){ file2 = null; } } function uploadFile(){ f = document.getElementById('file').files[0]; if (file1 === null){ file1 = f; document.getElementById('file1_details').innerHTML = escape(f.name) + '<img class="delete_logo" onclick="javascript:deletefile(\'file1\');" alt="delete" title="delete this file" src="'+myURL+'/delete.png"/>'; } else { if (file2 === null){ file2 = f; document.getElementById('file2_details').innerHTML = escape(f.name)+ '<img class="delete_logo" onclick="javascript:deletefile(\'file2\');" alt="delete" title="delete this file" src="'+myURL+'/delete.png"/>'; } else { alert('two files already loaded'); } } } function format_text(text){ text = text.replace(/(\r\n|\n|\r)/gm, ' '); // text = text.replace(/\u006F/g, '\u03BF');//fix for dodgy omicrons text = text.replace(notesRegex, '\n{$1}\n'); lines = text.split('\n'); for (var i=0; i<lines.length; i++){ line = lines[i]; if (line.length > 0 && line.search(/\{Status/) != -1){ lines[i] = ''; } else { if (line.length > 0 && line[0] === '{'){ line = line.replace(/</g, '-'); line = line.replace(/\|/g, '-'); line = line.replace(/>/g, '-'); line = line.replace(/\[/g, '('); line = line.replace(/\]/g, ')'); line = line.replace(/\u03BF/g, '\u006F'); // put comment os back to Latin not Greek lines[i] = line; } } } text = lines.join('\n'); text = text.replace(structureRegex, '\n|$1|\n'); lines = text.split('\n'); return lines; } function trim(s) { s = s.replace(/(^\s*)|(\s*$)/gi,""); s = s.replace(/[ ]{2,}/gi," "); s = s.replace(/\n /,"\n"); return s; } function clean_verse_text(text, ignore_layout, ignore_comments, ignore_tags, ignore_final_nu, ignore_punctuation, ignore_om, ignore_comm, ignore_lect){ text = text.toLowerCase(); text = text.replace(/\u0305/g, '~');//get rid of combining nomsac lines text = text.replace(/\u0323/g, '_');//get rid of underdots if (ignore_final_nu == true){ text = text.replace(/\u0304/g, '\u03BD');//final nu replacement } if (ignore_om){ text = text.replace(/\{\s*om\s*\}/g, ''); } if (ignore_comm){ text = text.replace(/\{\s*comm\s*\}/g, ''); } if (ignore_lect){ text = text.replace(/\{\s*lect\s*\}/g, ''); } if (ignore_punctuation){ text = text.replace(/\u0387/g, ''); //ano teleia text = text.replace(/\u00B7/g, '');//middle dot text = text.replace(/\u037E/g, '');//greek question mark text = text.replace(/\./g, ''); text = text.replace(/;/g, ''); text = text.replace(/,/g, ''); text = text.replace(/:/g, ''); } if (ignore_layout == true){ while (text.search('= ') != -1){ text = text.replace('= ', ''); } } if (ignore_comments == true){ text = text.replace(/\{.+?\}/g, ''); } if (ignore_tags == true){ text = text.replace(/\[.+?\]/g, ''); } return text; } function extract_verse_text(text, ignore_layout, ignore_comments, ignore_tags, ignore_final_nu, ignore_punctuation, ignore_om, ignore_comm, ignore_lect){ lines = format_text(text); verse_dict = {}; b = null; k = null; v = null; working = []; inVerse = false; key = 'preVerse1'; for (var i=0; i<lines.length; i+=1){ line = lines[i]; if (line.search(bookRegex) !== -1){ b = line.replace(bookRegex, '$1'); } else { if (line.search(chapterRegex) !== -1){ k = line.replace(chapterRegex, '$1'); if (k.length === 1){ k = '0' + k; } } else { if (line.search(verseRegex) !== -1){ if (working.length > 0){ if (verse_dict[key] === undefined){ verse_dict[key] = trim(clean_verse_text(working.join(' '), ignore_layout, ignore_comments, ignore_tags, ignore_final_nu, ignore_punctuation, ignore_om, ignore_comm, ignore_lect)); } } v = line.replace(verseRegex, '$1'); if (v.length == 1){ v = '0' + v; } key = b + '_' + k + ':' + v; working = []; } else { if (ignore_layout == true && (line.search(linebreakRegex) != -1 || line.search(columnRegex) != -1 || line.search(pageRegex) != -1 || line.search(folioRegex) != -1)){ //squelch } else { working.push(trim(line)); } } } } } if (working.length > 0){ verse_dict[key] = trim(clean_verse_text(working.join(' '), ignore_layout)); } //console.log(verse_dict); return verse_dict; } function extract_structure(text, ignore_layout){ lines = format_text(text); structure_list = []; for (var i=0; i<lines.length; i++){ line = lines[i]; if (line.length > 0 && line[0] === '|'){ if (ignore_layout === true){ if (line.search(bookRegex) !== -1){ structure_list.push(trim(line)); } else { if (line.search(chapterRegex) !== -1){ structure_list.push(trim(line)); } else { if (line.search(verseRegex) !== -1){ structure_list.push(trim(line)); } } } } else { structure_list.push(trim(line)); } } } return structure_list; } function textToList(text, ignore_structure){ lines = format_text(text); return []; } function getKeys(obj){ var keys = []; for(var key in obj){ if(obj.hasOwnProperty(key)) { keys.push(key); } } return keys; } function unique(a){ a.sort(); for(var i = 1; i < a.length; ){ if(a[i-1] == a[i]){ a.splice(i, 1); } else { i+=1; } } return a; } function compareStrings(s1, s2){ if (s1 === s2){ return ''; } else { return WDiffString(s1, s2);//diffString(s1, s2);// } } function toggle_checked(id){ if (document.getElementById(id).innerHTML == 'Hide checked verses'){ var checkboxes = document.getElementsByName('marked'); for (var i=0; i<checkboxes.length; i++){ if (checkboxes[i].checked){ checkboxes[i].parentNode.style.display = 'none'; } } document.getElementById(id).innerHTML = 'Show hidden verses'; checkedState = 'hide'; } else { var checkboxes = document.getElementsByName('marked'); for (var i=0; i<checkboxes.length; i++){ if (checkboxes[i].checked){ checkboxes[i].parentNode.style.display = 'block'; } } document.getElementById(id).innerHTML = 'Hide checked verses'; checkedState = 'show'; } } function toggle_results(id, link_id){ if(document.getElementById(id).style.display == 'none'){ document.getElementById(id).style.display = 'block'; document.getElementById(link_id).innerHTML = document.getElementById(link_id).innerHTML.replace('Show', 'Hide'); } else { if(document.getElementById(id).style.display == 'block'){ document.getElementById(id).style.display = 'none'; document.getElementById(link_id).innerHTML = document.getElementById(link_id).innerHTML.replace('Hide', 'Show'); } } } function compareFiles(s1, s2, ignore_structure, ignore_comments, ignore_tags, ignore_final_nu, ignore_punctuation, ignore_om, ignore_comm, ignore_lect){ document.getElementById('results_container').style.display = 'block'; var list1 = extract_structure(s1, ignore_structure); var list2 = extract_structure(s2, ignore_structure); var outlineResult = compareStrings(list1.join(' '), list2.join(' ')); document.getElementById('results_summary').innerHTML = 'Comparing <span style="background: ' + file1Background + ' color: ' + file1TextColour + '"> ' + file1.name + ' </span> with <span style="background: ' + file2Background + ' color: ' + file2TextColour + '"> ' + file2.name + ' </span><br/>'; if (outlineResult == ''){ document.getElementById('results_summary').innerHTML += 'Structures identical.<br/>'; } else { document.getElementById('results_summary').innerHTML += 'Structures are different.<br/>'; document.getElementById('structure_results').innerHTML += outlineResult.replace(/\|K/g, '\<br/>|K'); document.getElementById('structure_results_toggle').innerHTML += 'Show Structure Results'; } var dict1 = extract_verse_text(s1, ignore_structure, ignore_comments, ignore_tags, ignore_final_nu, ignore_punctuation, ignore_om, ignore_comm, ignore_lect); var dict2 = extract_verse_text(s2, ignore_structure, ignore_comments, ignore_tags, ignore_final_nu, ignore_punctuation, ignore_om, ignore_comm, ignore_lect); var compString1 = ''; var compString2 = ''; var keys = getKeys(dict1).concat(getKeys(dict2)); keys = unique(keys); for (var i=0; i<keys.length; i++){ if (dict1.hasOwnProperty(keys[i])){ compString1 = dict1[keys[i]]; } else { compString1 = ''; } if (dict2.hasOwnProperty(keys[i])){ compString2 = dict2[keys[i]]; } else { compString2 = ''; } var result = compareStrings(compString1, compString2); if (result != ''){ results[keys[i]] = '<input size="100" type="text" name="comments" onchange="update(this);"/><br/><input name="marked" type="checkbox" onchange="update(this)"/>' + result; } } var output = '<input onchange="javascript:toggleAll(this);" type="checkbox"><label>select/deselect all</label><a id="toggle_checked" href="javascript:toggle_checked(\'toggle_checked\')">Hide checked verses</a><br/>'; var keys = getKeys(results); if (keys.length == 0){ document.getElementById('results_summary').innerHTML += 'Biblical text identical.<br/>'; } else { for (var i=0; i<keys.length; i++){ output = output + '<div class="verse_comp">' + keys[i] + ' ' + results[keys[i]] + '</div>'; } document.getElementById('results_summary').innerHTML += keys.length + ' Biblical verses have differences.<br/>'; document.getElementById('text_results').innerHTML = output; document.getElementById('text_results_toggle').innerHTML = 'Show Text Results'; } } function parse_XML(xml){ try { xmlDoc=new ActiveXObject("Microsoft.XMLDOM"); xmlDoc.async="false"; xmlDoc.loadXML(xml); } catch(e) { parser=new DOMParser(); xmlDoc=parser.parseFromString(xml,"text/xml"); } return xmlDoc; } //try to get a valid dom by correcting once for a known bug in the diff.js library function get_dom(string){ var xmlDoc = parse_XML(string); if (xmlDoc.documentElement.nodeName=="parsererror"){ xmlDoc = parse_XML(string.replace(/<\/container>/, '</span></container>')); if (xmlDoc.documentElement.nodeName=="parsererror"){ return false; } else { return xmlDoc; } } else { return xmlDoc; } } //given a difference node find the full word that contains it. Stop at space and ] or [ function get_word_limits(startNode) { //remove outer if statements for a word of context on one side of a full word change var following_text = ''; var preceding_text = ''; var following, previous; if ((startNode.childNodes[0].nodeValue[0] != ' ' && startNode.childNodes[0].nodeValue[0] != '[') || ((startNode.childNodes[0].nodeValue[0] == ' ' || startNode.childNodes[0].nodeValue[0] == '[') && startNode.childNodes[0].nodeValue.length == 1)){ if (startNode.previousSibling != null && startNode.previousSibling.nodeType === 3){ previous = startNode.previousSibling; if (previous.nodeValue[previous.nodeValue.length-1] !== ' ' && previous.nodeValue[previous.nodeValue.length-1] !== ']'){ preceding_text = previous.nodeValue.split(' '); preceding_text = preceding_text[preceding_text.length-1]; preceding_text = preceding_text.substring(preceding_text.lastIndexOf(']')+1); //console.log(preceding_text) } } } if ((startNode.childNodes[0].nodeValue[startNode.childNodes[0].nodeValue.length-1] != ' ' && startNode.childNodes[0].nodeValue[startNode.childNodes[0].nodeValue.length-1] != ']') || ((startNode.childNodes[0].nodeValue[startNode.childNodes[0].nodeValue.length-1] == ' ' || startNode.childNodes[0].nodeValue[startNode.childNodes[0].nodeValue.length-1] == ']') && startNode.childNodes[0].nodeValue.length == 1)){ if (startNode.nextSibling != null && startNode.nextSibling.nodeType === 3){ following = startNode.nextSibling; //console.log(following) if (following.nodeValue[0] !== ' ' && following.nodeValue[0] !== '['){ following_text = following.nodeValue.split(' ')[0]; if (following_text.indexOf('[') !== -1){ following_text = following_text.substring(0, following_text.indexOf('[')); } //console.log(following_text) } } } return [preceding_text, following_text]; } //create a list of differences from this verse //input: the html string of a verse from the text results view //output: a 2 item list per difference all wrapped up in another list function process_diffs(string) { // console.log(string) var differences = []; var i, xmlDoc, spans, spanType, surrounding_text, last_span, first_span, pre_string, post_string; var text1_string = ''; var text2_string = ''; xmlDoc = get_dom(string); if (xmlDoc == false){ console.log(parse_XML(string)); } else { spans = xmlDoc.getElementsByTagName('span'); for (i=0; i<spans.length; i+=1) { //treat as stand alone variant IF: // the nextSibling is not a span // OR // the nextSibling is a span AND the current span ends with a space OR the next span starts with a space // OR // there is no nextSibling if ((spans[i].nextSibling != null && spans[i].nextSibling.nodeName !== 'span') || (spans[i].nextSibling != null && spans[i].nextSibling.nodeName == 'span' && (spans[i].childNodes[0].nodeValue[spans[i].childNodes[0].nodeValue.length-1] == ' ' || spans[i].nextSibling.childNodes[0].nodeValue[0] == ' ')) || spans[i].nextSibling == null){ //examples like GK 1:18 could be better spanType = spans[i].getAttribute('class'); if (spanType === 'wDiffHtmlDelete') { surrounding_text = get_word_limits(spans[i]); if (surrounding_text[0] !== '' || surrounding_text[1] !== ''){ differences.push([surrounding_text[0] + spans[i].childNodes[0].nodeValue + surrounding_text[1], surrounding_text[0] + surrounding_text[1]]); } else { differences.push([spans[i].childNodes[0].nodeValue, '[BLANK]']); } } else { if (spanType === 'wDiffHtmlInsert') { surrounding_text = get_word_limits(spans[i]); if (surrounding_text[0] !== '' || surrounding_text[1] !== ''){ differences.push([surrounding_text[0] + surrounding_text[1], surrounding_text[0] + spans[i].childNodes[0].nodeValue + surrounding_text[1]]); } else { differences.push(['[BLANK]', spans[i].childNodes[0].nodeValue]); } } } } else { text1_string = ''; text2_string = ''; first_span = spans[i]; last_span = null; spanType = spans[i].getAttribute('class'); if (spanType === 'wDiffHtmlDelete') { text1_string += spans[i].childNodes[0].nodeValue; } else { if (spanType === 'wDiffHtmlInsert') { text2_string += spans[i].childNodes[0].nodeValue; } } while (spans[i].nextSibling !== null && spans[i].nextSibling.nodeName === 'span'){ spanType = spans[i].nextSibling.getAttribute('class'); if (spanType === 'wDiffHtmlDelete') { text1_string += spans[i].nextSibling.childNodes[0].nodeValue; } else { if (spanType === 'wDiffHtmlInsert') { text2_string += spans[i].nextSibling.childNodes[0].nodeValue; } } last_span = spans[i].nextSibling; i+=1; } pre_string = get_word_limits(first_span); post_string = get_word_limits(last_span); if (text1_string === ''){ text1_string ='[BLANK]'; } if (text2_string === ''){ text2_string = '[BLANK]'; } differences.push([pre_string[0] + text1_string + post_string[1], pre_string[0] + text2_string + post_string[1]]); //spaces in both strings need to be taken into account for pre/post string } } } return differences; } function saveText(){ newwindow1=open('','name','height=200,width=150,scrollbars=yes'); var tmp = newwindow1.document; var keys = getKeys(results); for (var i=0; i<keys.length; i+=1){ tmp.write(keys[i] + '<br/>'); string = results[keys[i]].replace(/ /g, ' '); diffs = process_diffs('<container>' + string + '</container>'); //console.log(diffs); for (var j=0; j<diffs.length; j+=1) { tmp.write(diffs[j][0]); tmp.write(' ] '); tmp.write(diffs[j][1]); tmp.write('<br/>'); } tmp.write('<br/>'); } tmp.close(); } function getReconciliationString() { var textString = escape(document.getElementById('results_summary').innerHTML) + '|||' + escape(document.getElementById('structure_results').innerHTML) + '|||' + escape(document.getElementById('text_results').innerHTML); return textString; } function saveReconciliation(){ //option 3 - put serialisation in popup window for users to copy and paste into a file var textString = getReconciliationString(); console.log(textString); newwindow2=open('','name','height=200,width=150,scrollbars=yes'); var tmp = newwindow2.document; tmp.write(textString); tmp.close(); } function toggleAll(elem){ boxes = document.getElementsByName('marked'); if (elem.checked){ elem.setAttribute('checked', 'checked'); for (var i=0; i<boxes.length; i+=1){ boxes[i].checked = true; boxes[i].setAttribute('checked', 'checked'); update(boxes[i]); } } else { elem.removeAttribute('checked'); for (var i=0; i<boxes.length; i+=1){ boxes[i].checked = false; boxes[i].removeAttribute('checked'); update(boxes[i]); } } } function update(elem){ if (elem.type === 'checkbox'){ if (elem.checked){ elem.setAttribute('checked', 'checked'); if (checkedState === 'hide'){ elem.parentNode.style.display = 'none'; } } else { elem.removeAttribute('checked'); if (checkedState === 'hide'){ elem.parentNode.style.display = 'block'; } } } else { elem.setAttribute('value', elem.value); } } function uploadReconciliation (){ f = document.getElementById('reconciliation').files[0]; reader = new FileReader(); reader.onload = ( function (){ return function (evt){ var fullString = evt.target.result; uploadReconciliationData(fullString); }; }()); reader.readAsText(f); return f.name; } function uploadReconciliationData(fullString) { var body = document.getElementsByTagName('body')[0]; body.className = 'waiting'; document.getElementById('results_container').style.display = 'none'; document.getElementById('text_results').style.display = 'none'; document.getElementById('structure_results').style.display = 'none'; document.getElementById('text_results_toggle').innerHTML = ''; document.getElementById('structure_results_toggle').innerHTML = ''; var stringList = fullString.split('|||'); var summaryString = stringList[0]; var structureString = stringList[1]; var textString = stringList[2]; document.getElementById('results_summary').innerHTML = unescape(summaryString); document.getElementById('structure_results').innerHTML = unescape(structureString); document.getElementById('text_results').innerHTML = unescape(textString); document.getElementById('results_container').style.display = 'block'; document.getElementById('structure_results_toggle').innerHTML = 'Show Structure Results'; document.getElementById('text_results_toggle').innerHTML = 'Show Text Results'; body.className = 'active'; document.getElementById('file1_details').innerHTML = ''; document.getElementById('file2_details').innerHTML = ''; file1 = file2 = null; return; } function compare() { /* if (file1 == null || file2 == null){ alert('Please select two files before continuing.'); } */ var body = document.getElementsByTagName('body')[0]; body.className = 'waiting'; var ignore_structure = document.getElementById('layout').checked; var ignore_comments = document.getElementById('all_comments').checked; var ignore_tags = document.getElementById('tags').checked; var ignore_final_nu = document.getElementById('final_nu').checked; var ignore_punctuation = document.getElementById('punctuation').checked; var ignore_om = document.getElementById('comment_om').checked; var ignore_comm = document.getElementById('comment_comm').checked; var ignore_lect = document.getElementById('comment_lect').checked; if (true) { // if (file1 == '[buffer 1]' && file2 == '[buffer 2]') { var lhs = decodeURIComponent($('#lhsdata').val()); var rhs = decodeURIComponent($('#rhsdata').val()); compareFiles(lhs, rhs, ignore_structure, ignore_comments, ignore_tags, ignore_final_nu, ignore_punctuation, ignore_om, ignore_comm, ignore_lect); body.className = 'active'; } else { readFiles(function(buffer1, buffer2) { compareFiles(buffer1, buffer2, ignore_structure, ignore_comments, ignore_tags, ignore_final_nu, ignore_punctuation, ignore_om, ignore_comm, ignore_lect); body.className = 'active'; }); } } function readFiles(callback) { var reader1 = new FileReader(); var reader2 = new FileReader(); document.getElementById('results_container').style.display = 'none'; document.getElementById('text_results').style.display = 'none'; document.getElementById('structure_results').style.display = 'none'; document.getElementById('text_results_toggle').innerHTML = ''; document.getElementById('structure_results_toggle').innerHTML = ''; reader2.onload = (function () { return function (evt) { file2String = evt.target.result; callback(file1String, file2String); }; }(file2String)); reader1.onload = (function () { return function (evt) { file1String = evt.target.result; reader2.readAsText(file2); }; }(file1String)); reader1.readAsText(file1); } /* Name: wDiff.js Version: 0.9.9 (October 10, 2010) Info: http://en.wikipedia.org/wiki/User:Cacycle/diff Code: http://en.wikipedia.org/wiki/User:Cacycle/diff.js JavaScript diff algorithm by [[en:User:Cacycle]] (http://en.wikipedia.org/wiki/User_talk:Cacycle). Outputs html/css-formatted new text with highlighted deletions, inserts, and block moves. For newline highlighting the following style rules have to be added to the document: .wDiffParagraph:before { content: "¶"; }; The program uses cross-browser code and should work with all modern browsers. It has been tested with: * Mozilla Firefox 1.5.0.1 * Mozilla SeaMonkey 1.0 * Opera 8.53 * Internet Explorer 6.0.2900.2180 * Internet Explorer 7.0.5730.11 This program is also compatible with Greasemonkey An implementation of the word-based algorithm from: Communications of the ACM 21(4):264 (1978) http://doi.acm.org/10.1145/359460.359467 With the following additional feature: * Word types have been optimized for MediaWiki source texts * Additional post-pass 5 code for resolving islands caused by adding two common words at the end of sequences of common words * Additional detection of block borders and color coding of moved blocks and their original position * Optional "intelligent" omission of unchanged parts from the output This code is used by the MediaWiki in-browser text editors [[en:User:Cacycle/editor]] and [[en:User:Cacycle/wikEd]] and the enhanced diff view tool wikEdDiff [[en:User:Cacycle/wikEd]]. Usage: var htmlText = WDiffString(oldText, newText); This code has been released into the public domain. Datastructures (abbreviations from publication): text: an object that holds all text related datastructures .newWords: consecutive words of the new text (N) .oldWords: consecutive words of the old text (O) .newToOld: array pointing to corresponding word number in old text (NA) .oldToNew: array pointing to corresponding word number in new text (OA) .message: output message for testing purposes symbol table: symbols[word]: associative array (object) of detected words for passes 1 - 3, points to symbol[i] symbol[i]: array of objects that hold word counters and pointers: .newCtr: new word occurences counter (NC) .oldCtr: old word occurences counter (OC) .toNew: first word occurrence in new text, points to text.newWords[i] .toOld: last word occurrence in old text, points to text.oldWords[i] block: an object that holds block move information blocks indexed after new text: .newStart: new text word number of start of this block .newLength: element number of this block including non-words .newWords: true word number of this block .newNumber: corresponding block index in old text .newBlock: moved-block-number of a block that has been moved here .newLeft: moved-block-number of a block that has been moved from this border leftwards .newRight: moved-block-number of a block that has been moved from this border rightwards .newLeftIndex: index number of a block that has been moved from this border leftwards .newRightIndex: index number of a block that has been moved from this border rightwards blocks indexed after old text: .oldStart: word number of start of this block .oldToNew: corresponding new text word number of start .oldLength: element number of this block including non-words .oldWords: true word number of this block */ // css for change indicators if (typeof(wDiffStyleDelete) == 'undefined') { wDiffStyleDelete = 'font-weight: normal; text-decoration: none; color: #fff; background-color: #990033;'; } if (typeof(wDiffStyleInsert) == 'undefined') { wDiffStyleInsert = 'font-weight: normal; text-decoration: none; color: #fff; background-color: #009933;'; } if (typeof(wDiffStyleMoved) == 'undefined') { wDiffStyleMoved = 'font-weight: bold; color: #000; vertical-align: text-bottom; font-size: xx-small; padding: 0; border: solid 1px;'; } if (typeof(wDiffStyleBlock) == 'undefined') { wDiffStyleBlock = [ 'color: #000; background-color: #ffff80;', 'color: #000; background-color: #c0ffff;', 'color: #000; background-color: #ffd0f0;', 'color: #000; background-color: #ffe080;', 'color: #000; background-color: #aaddff;', 'color: #000; background-color: #ddaaff;', 'color: #000; background-color: #ffbbbb;', 'color: #000; background-color: #d8ffa0;', 'color: #000; background-color: #d0d0d0;' ]; } // html for change indicators, {number} is replaced by the block number // {block} is replaced by the block style, class and html comments are important for shortening the output if (typeof(wDiffHtmlMovedRight) == 'undefined') { wDiffHtmlMovedRight = '<input class="wDiffHtmlMovedRight" type="button" value=">" style="' + wDiffStyleMoved + ' {block}"><!--wDiffHtmlMovedRight-->'; } if (typeof(wDiffHtmlMovedLeft) == 'undefined') { wDiffHtmlMovedLeft = '<input class="wDiffHtmlMovedLeft" type="button" value="<" style="' + wDiffStyleMoved + ' {block}"><!--wDiffHtmlMovedLeft-->'; } if (typeof(wDiffHtmlBlockStart) == 'undefined') { wDiffHtmlBlockStart = '<span class="wDiffHtmlBlock" style="{block}">'; } if (typeof(wDiffHtmlBlockEnd) == 'undefined') { wDiffHtmlBlockEnd = '</span><!--wDiffHtmlBlock-->'; } if (typeof(wDiffHtmlDeleteStart) == 'undefined') { wDiffHtmlDeleteStart = '<span class="wDiffHtmlDelete" style="' + wDiffStyleDelete + '">'; } if (typeof(wDiffHtmlDeleteEnd) == 'undefined') { wDiffHtmlDeleteEnd = '</span>'; } if (typeof(wDiffHtmlInsertStart) == 'undefined') { wDiffHtmlInsertStart = '<span class="wDiffHtmlInsert" style="' + wDiffStyleInsert + '">'; } if (typeof(wDiffHtmlInsertEnd) == 'undefined') { wDiffHtmlInsertEnd = '</span>'; } // minimal number of real words for a moved block (0 for always displaying block move indicators) if (typeof(wDiffBlockMinLength) == 'undefined') { wDiffBlockMinLength = 3; } // exclude identical sequence starts and endings from change marking if (typeof(wDiffWordDiff) == 'undefined') { wDiffWordDiff = false; } // enable recursive diff to resolve problematic sequences if (typeof(wDiffRecursiveDiff) == 'undefined') { wDiffRecursiveDiff = true; } // enable block move display if (typeof(wDiffShowBlockMoves) == 'undefined') { wDiffShowBlockMoves = true; } // remove unchanged parts from final output // characters before diff tag to search for previous heading, paragraph, line break, cut characters if (typeof(wDiffHeadingBefore) == 'undefined') { wDiffHeadingBefore = 1500; } if (typeof(wDiffParagraphBefore) == 'undefined') { wDiffParagraphBefore = 1500; } if (typeof(wDiffLineBeforeMax) == 'undefined') { wDiffLineBeforeMax = 1000; } if (typeof(wDiffLineBeforeMin) == 'undefined') { wDiffLineBeforeMin = 500; } if (typeof(wDiffBlankBeforeMax) == 'undefined') { wDiffBlankBeforeMax = 1000; } if (typeof(wDiffBlankBeforeMin) == 'undefined') { wDiffBlankBeforeMin = 500; } if (typeof(wDiffCharsBefore) == 'undefined') { wDiffCharsBefore = 500; } // characters after diff tag to search for next heading, paragraph, line break, or characters if (typeof(wDiffHeadingAfter) == 'undefined') { wDiffHeadingAfter = 1500; } if (typeof(wDiffParagraphAfter) == 'undefined') { wDiffParagraphAfter = 1500; } if (typeof(wDiffLineAfterMax) == 'undefined') { wDiffLineAfterMax = 1000; } if (typeof(wDiffLineAfterMin) == 'undefined') { wDiffLineAfterMin = 500; } if (typeof(wDiffBlankAfterMax) == 'undefined') { wDiffBlankAfterMax = 1000; } if (typeof(wDiffBlankAfterMin) == 'undefined') { wDiffBlankAfterMin = 500; } if (typeof(wDiffCharsAfter) == 'undefined') { wDiffCharsAfter = 500; } // maximal fragment distance to join close fragments if (typeof(wDiffFragmentJoin) == 'undefined') { wDiffFragmentJoin = 1000; } if (typeof(wDiffOmittedChars) == 'undefined') { wDiffOmittedChars = '…'; } if (typeof(wDiffOmittedLines) == 'undefined') { wDiffOmittedLines = '<hr style="height: 2px; margin: 1em 10%;">'; } if (typeof(wDiffNoChange) == 'undefined') { wDiffNoChange = '<hr style="height: 2px; margin: 1em 20%;">'; } // compatibility fix for old name of main function StringDiff = WDiffString; // WDiffString: main program // input: oldText, newText, strings containing the texts // returns: html diff function WDiffString(oldText, newText) { // IE / Mac fix oldText = oldText.replace(/\r\n?/g, '\n'); newText = newText.replace(/\r\n?/g, '\n'); var text = {}; text.newWords = []; text.oldWords = []; text.newToOld = []; text.oldToNew = []; text.message = ''; var block = {}; var outText = ''; // trap trivial changes: no change if (oldText == newText) { outText = newText; outText = WDiffEscape(outText); outText = WDiffHtmlFormat(outText); return(outText); } // trap trivial changes: old text deleted if ( (oldText == null) || (oldText.length == 0) ) { outText = newText; outText = WDiffEscape(outText); outText = WDiffHtmlFormat(outText); outText = wDiffHtmlInsertStart + outText + wDiffHtmlInsertEnd; return(outText); } // trap trivial changes: new text deleted if ( (newText == null) || (newText.length == 0) ) { outText = oldText; outText = WDiffEscape(outText); outText = WDiffHtmlFormat(outText); outText = wDiffHtmlDeleteStart + outText + wDiffHtmlDeleteEnd; return(outText); } // split new and old text into words WDiffSplitText(oldText, newText, text); // calculate diff information WDiffText(text); //detect block borders and moved blocks WDiffDetectBlocks(text, block); // process diff data into formatted html text outText = WDiffToHtml(text, block); // IE fix outText = outText.replace(/> ( *)</g, '> $1<'); return(outText); }; // // //// WDiffSplitText: split new and old text into words //// input: oldText, newText, strings containing the texts //// changes: text.newWords and text.oldWords, arrays containing the texts in arrays of words // function WDiffSplitText(oldText, newText, text) { // convert strange spaces oldText = oldText.replace(/[\t\u000b\u00a0\u2028\u2029]+/g, ' '); newText = newText.replace(/[\t\u000b\u00a0\u2028\u2029]+/g, ' '); // split old text into words // / | | | | | | | | | | | | | | / //var pattern=/[\w]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'''|''|=+|\{\||\|\}|\|\-|./g; var pattern = /[\w]+|[\u03B1-\u03C9\u0391-\u03A9~_]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'''|''|=+|\{\||\|\}|\|\-|./g; var result; do { result = pattern.exec(oldText); if (result != null) { text.oldWords.push(result[0]); } } while (result != null); // split new text into words do { result = pattern.exec(newText); if (result != null) { text.newWords.push(result[0]); } } while (result != null); // console.log(text.newWords) // console.log(text.oldWords) return; }; // WDiffText: calculate diff information // input: text.newWords and text.oldWords, arrays containing the texts as arrays of words // optionally for recursive calls: newStart, newEnd, oldStart, oldEnd, recursionLevel // changes: text.newToOld and text.oldToNew, arrays pointing to corresponding words function WDiffText(text, newStart, newEnd, oldStart, oldEnd, recursionLevel) { var symbol = []; var symbols = {}; // set defaults if (typeof(newStart) == 'undefined') { newStart = 0; } if (typeof(newEnd) == 'undefined') { newEnd = text.newWords.length; } if (typeof(oldStart) == 'undefined') { oldStart = 0; } if (typeof(oldEnd) == 'undefined') { oldEnd = text.oldWords.length; } if (typeof(recursionLevel) == 'undefined') { recursionLevel = 0; } // limit recursion depth if (recursionLevel > 10) { return; } // // pass 1: Parse new text into symbol table // for (var i = newStart; i < newEnd; i ++) { var word = text.newWords[i]; // preserve the native method if (word.indexOf('hasOwnProperty') == 0) { word = word.replace(/^(hasOwnProperty_*)$/, '$1_'); } // add new entry to symbol table if (symbols.hasOwnProperty(word) == false) { var last = symbol.length; symbols[word] = last; symbol[last] = { newCtr: 1, oldCtr: 0, toNew: i, toOld: null }; } // or update existing entry else { // increment word counter for new text var hashToArray = symbols[word]; symbol[hashToArray].newCtr ++; } } // // pass 2: parse old text into symbol table // for (var i = oldStart; i < oldEnd; i ++) { var word = text.oldWords[i]; // preserve the native method if (word.indexOf('hasOwnProperty') == 0) { word = word.replace(/^(hasOwnProperty_*)$/, '$1_'); } // add new entry to symbol table if (symbols.hasOwnProperty(word) == false) { var last = symbol.length; symbols[word] = last; symbol[last] = { newCtr: 0, oldCtr: 1, toNew: null, toOld: i }; } // or update existing entry else { // increment word counter for old text var hashToArray = symbols[word]; symbol[hashToArray].oldCtr ++; // add word number for old text symbol[hashToArray].toOld = i; } } // // pass 3: connect unique words // for (var i = 0; i < symbol.length; i ++) { // find words in the symbol table that occur only once in both versions if ( (symbol[i].newCtr == 1) && (symbol[i].oldCtr == 1) ) { var toNew = symbol[i].toNew; var toOld = symbol[i].toOld; // do not use spaces as unique markers if (/^\s+$/.test(text.newWords[toNew]) == false) { // connect from new to old and from old to new text.newToOld[toNew] = toOld; text.oldToNew[toOld] = toNew; } } } // // pass 4: connect adjacent identical words downwards // for (var i = newStart; i < newEnd - 1; i ++) { // find already connected pairs if (text.newToOld[i] != null) { var j = text.newToOld[i]; // check if the following words are not yet connected if ( (text.newToOld[i + 1] == null) && (text.oldToNew[j + 1] == null) ) { // connect if the following words are the same if (text.newWords[i + 1] == text.oldWords[j + 1]) { text.newToOld[i + 1] = j + 1; text.oldToNew[j + 1] = i + 1; } } } } // // pass 5: connect adjacent identical words upwards // for (var i = newEnd - 1; i > newStart; i --) { // find already connected pairs if (text.newToOld[i] != null) { var j = text.newToOld[i]; // check if the preceeding words are not yet connected if ( (text.newToOld[i - 1] == null) && (text.oldToNew[j - 1] == null) ) { // connect if the preceeding words are the same if ( text.newWords[i - 1] == text.oldWords[j - 1] ) { text.newToOld[i - 1] = j - 1; text.oldToNew[j - 1] = i - 1; } } } } // // "pass" 6: recursively diff still unresolved regions downwards // if (wDiffRecursiveDiff == true) { var i = newStart; var j = oldStart; while (i < newEnd) { if (text.newToOld[i - 1] != null) { j = text.newToOld[i - 1] + 1; } // check for the start of an unresolved sequence if ( (text.newToOld[i] == null) && (text.oldToNew[j] == null) ) { // determine the ends of the sequences var iStart = i; var iEnd = i; while ( (text.newToOld[iEnd] == null) && (iEnd < newEnd) ) { iEnd ++; } var iLength = iEnd - iStart; var jStart = j; var jEnd = j; while ( (text.oldToNew[jEnd] == null) && (jEnd < oldEnd) ) { jEnd ++; } var jLength = jEnd - jStart; // recursively diff the unresolved sequence if ( (iLength > 0) && (jLength > 0) ) { if ( (iLength > 1) || (jLength > 1) ) { if ( (iStart != newStart) || (iEnd != newEnd) || (jStart != oldStart) || (jEnd != oldEnd) ) { WDiffText(text, iStart, iEnd, jStart, jEnd, recursionLevel + 1); } } } i = iEnd; } else { i ++; } } } // // "pass" 7: recursively diff still unresolved regions upwards // if (wDiffRecursiveDiff == true) { var i = newEnd - 1; var j = oldEnd - 1; while (i >= newStart) { if (text.newToOld[i + 1] != null) { j = text.newToOld[i + 1] - 1; } // check for the start of an unresolved sequence if ( (text.newToOld[i] == null) && (text.oldToNew[j] == null) ) { // determine the ends of the sequences var iStart = i; var iEnd = i + 1; while ( (text.newToOld[iStart - 1] == null) && (iStart >= newStart) ) { iStart --; } if (iStart < 0) { iStart = 0; } var iLength = iEnd - iStart; var jStart = j; var jEnd = j + 1; while ( (text.oldToNew[jStart - 1] == null) && (jStart >= oldStart) ) { jStart --; } if (jStart < 0) { jStart = 0; } var jLength = jEnd - jStart; // recursively diff the unresolved sequence if ( (iLength > 0) && (jLength > 0) ) { if ( (iLength > 1) || (jLength > 1) ) { if ( (iStart != newStart) || (iEnd != newEnd) || (jStart != oldStart) || (jEnd != oldEnd) ) { WDiffText(text, iStart, iEnd, jStart, jEnd, recursionLevel + 1); } } } i = iStart - 1; } else { i --; } } } return; }; // WDiffToHtml: process diff data into formatted html text // input: text.newWords and text.oldWords, arrays containing the texts in arrays of words // text.newToOld and text.oldToNew, arrays pointing to corresponding words // block data structure // returns: outText, a html string function WDiffToHtml(text, block) { var outText = text.message; var blockNumber = 0; var i = 0; var j = 0; var movedAsInsertion; // cycle through the new text do { var movedIndex = []; var movedBlock = []; var movedLeft = []; var blockText = ''; var identText = ''; var delText = ''; var insText = ''; var identStart = ''; // check if a block ends here and finish previous block if (movedAsInsertion != null) { if (movedAsInsertion == false) { identStart += wDiffHtmlBlockEnd; } else { identStart += wDiffHtmlInsertEnd; } movedAsInsertion = null; } // detect block boundary if ( (text.newToOld[i] != j) || (blockNumber == 0 ) ) { if ( ( (text.newToOld[i] != null) || (i >= text.newWords.length) ) && ( (text.oldToNew[j] != null) || (j >= text.oldWords.length) ) ) { // block moved right var moved = block.newRight[blockNumber]; if (moved > 0) { var index = block.newRightIndex[blockNumber]; movedIndex.push(index); movedBlock.push(moved); movedLeft.push(false); } // block moved left moved = block.newLeft[blockNumber]; if (moved > 0) { var index = block.newLeftIndex[blockNumber]; movedIndex.push(index); movedBlock.push(moved); movedLeft.push(true); } // check if a block starts here moved = block.newBlock[blockNumber]; if (moved > 0) { // mark block as inserted text if (block.newWords[blockNumber] < wDiffBlockMinLength) { identStart += wDiffHtmlInsertStart; movedAsInsertion = true; } // mark block by color else { if (moved > wDiffStyleBlock.length) { moved = wDiffStyleBlock.length; } identStart += WDiffHtmlCustomize(wDiffHtmlBlockStart, moved - 1); movedAsInsertion = false; } } if (i >= text.newWords.length) { i ++; } else { j = text.newToOld[i]; blockNumber ++; } } } // get the correct order if moved to the left as well as to the right from here if (movedIndex.length == 2) { if (movedIndex[0] > movedIndex[1]) { movedIndex.reverse(); movedBlock.reverse(); movedLeft.reverse(); } } // handle left and right block moves from this position for (var m = 0; m < movedIndex.length; m ++) { // insert the block as deleted text if (block.newWords[ movedIndex[m] ] < wDiffBlockMinLength) { var movedStart = block.newStart[ movedIndex[m] ]; var movedLength = block.newLength[ movedIndex[m] ]; var str = ''; for (var n = movedStart; n < movedStart + movedLength; n ++) { str += text.newWords[n]; } str = WDiffEscape(str); str = str.replace(/\n/g, '<span class="wDiffParagraph"></span><br/>'); blockText += wDiffHtmlDeleteStart + str + wDiffHtmlDeleteEnd; } // add a placeholder / move direction indicator else { if (movedBlock[m] > wDiffStyleBlock.length) { movedBlock[m] = wDiffStyleBlock.length; } if (movedLeft[m]) { blockText += WDiffHtmlCustomize(wDiffHtmlMovedLeft, movedBlock[m] - 1); } else { blockText += WDiffHtmlCustomize(wDiffHtmlMovedRight, movedBlock[m] - 1); } } } // collect consecutive identical text while ( (i < text.newWords.length) && (j < text.oldWords.length) ) { if ( (text.newToOld[i] == null) || (text.oldToNew[j] == null) ) { break; } if (text.newToOld[i] != j) { break; } identText += text.newWords[i]; i ++; j ++; } // collect consecutive deletions while ( (text.oldToNew[j] == null) && (j < text.oldWords.length) ) { delText += text.oldWords[j]; j ++; } // collect consecutive inserts while ( (text.newToOld[i] == null) && (i < text.newWords.length) ) { insText += text.newWords[i]; i ++; } // remove leading and trailing similarities between delText and ins from highlighting var preText = ''; var postText = ''; if (wDiffWordDiff) { if ( (delText != '') && (insText != '') ) { // remove leading similarities while ( delText.charAt(0) == insText.charAt(0) && (delText != '') && (insText != '') ) { preText = preText + delText.charAt(0); delText = delText.substr(1); insText = insText.substr(1); } // remove trailing similarities while ( delText.charAt(delText.length - 1) == insText.charAt(insText.length - 1) && (delText != '') && (insText != '') ) { postText = delText.charAt(delText.length - 1) + postText; delText = delText.substr(0, delText.length - 1); insText = insText.substr(0, insText.length - 1); } } } // output the identical text, deletions and inserts // moved from here indicator if (blockText != '') { outText += blockText; } // identical text if (identText != '') { outText += identStart + WDiffEscape(identText); } outText += preText; // deleted text if (delText != '') { delText = wDiffHtmlDeleteStart + WDiffEscape(delText) + wDiffHtmlDeleteEnd; delText = delText.replace(/\n/g, '<span class="wDiffParagraph"></span><br/>'); outText += delText; } // inserted text if (insText != '') { insText = wDiffHtmlInsertStart + WDiffEscape(insText) + wDiffHtmlInsertEnd; insText = insText.replace(/\n/g, '<span class="wDiffParagraph"></span><br/>'); outText += insText; } outText += postText; } while (i <= text.newWords.length); outText += '\n'; outText = WDiffHtmlFormat(outText); return(outText); }; // WDiffEscape: replaces html-sensitive characters in output text with character entities function WDiffEscape(text) { text = text.replace(/&/g, '&'); text = text.replace(/</g, '<'); text = text.replace(/>/g, '>'); text = text.replace(/"/g, '"'); return(text); }; // HtmlCustomize: customize indicator html: replace {number} with the block number, {block} with the block style function WDiffHtmlCustomize(text, block) { text = text.replace(/\{number\}/, block); text = text.replace(/\{block\}/, wDiffStyleBlock[block]); return(text); }; // HtmlFormat: replaces newlines and multiple spaces in text with html code function WDiffHtmlFormat(text) { text = text.replace(/ {2}/g, ' '); text = text.replace(/\n/g, '<br/>'); return(text); }; // WDiffDetectBlocks: detect block borders and moved blocks // input: text object, block object function WDiffDetectBlocks(text, block) { block.oldStart = []; block.oldToNew = []; block.oldLength = []; block.oldWords = []; block.newStart = []; block.newLength = []; block.newWords = []; block.newNumber = []; block.newBlock = []; block.newLeft = []; block.newRight = []; block.newLeftIndex = []; block.newRightIndex = []; var blockNumber = 0; var wordCounter = 0; var realWordCounter = 0; // get old text block order if (wDiffShowBlockMoves) { var j = 0; var i = 0; do { // detect block boundaries on old text if ( (text.oldToNew[j] != i) || (blockNumber == 0 ) ) { if ( ( (text.oldToNew[j] != null) || (j >= text.oldWords.length) ) && ( (text.newToOld[i] != null) || (i >= text.newWords.length) ) ) { if (blockNumber > 0) { block.oldLength[blockNumber - 1] = wordCounter; block.oldWords[blockNumber - 1] = realWordCounter; wordCounter = 0; realWordCounter = 0; } if (j >= text.oldWords.length) { j ++; } else { i = text.oldToNew[j]; block.oldStart[blockNumber] = j; block.oldToNew[blockNumber] = text.oldToNew[j]; blockNumber ++; } } } // jump over identical pairs while ( (i < text.newWords.length) && (j < text.oldWords.length) ) { if ( (text.newToOld[i] == null) || (text.oldToNew[j] == null) ) { break; } if (text.oldToNew[j] != i) { break; } i ++; j ++; wordCounter ++; if ( /\w/.test( text.newWords[i] ) ) { realWordCounter ++; } } // jump over consecutive deletions while ( (text.oldToNew[j] == null) && (j < text.oldWords.length) ) { j ++; } // jump over consecutive inserts while ( (text.newToOld[i] == null) && (i < text.newWords.length) ) { i ++; } } while (j <= text.oldWords.length); // get the block order in the new text var lastMin; var currMinIndex; lastMin = null; // sort the data by increasing start numbers into new text block info for (var i = 0; i < blockNumber; i ++) { currMin = null; for (var j = 0; j < blockNumber; j ++) { curr = block.oldToNew[j]; if ( (curr > lastMin) || (lastMin == null) ) { if ( (curr < currMin) || (currMin == null) ) { currMin = curr; currMinIndex = j; } } } block.newStart[i] = block.oldToNew[currMinIndex]; block.newLength[i] = block.oldLength[currMinIndex]; block.newWords[i] = block.oldWords[currMinIndex]; block.newNumber[i] = currMinIndex; lastMin = currMin; } // detect not moved blocks for (var i = 0; i < blockNumber; i ++) { if (block.newBlock[i] == null) { if (block.newNumber[i] == i) { block.newBlock[i] = 0; } } } // detect switches of neighbouring blocks for (var i = 0; i < blockNumber - 1; i ++) { if ( (block.newBlock[i] == null) && (block.newBlock[i + 1] == null) ) { if (block.newNumber[i] - block.newNumber[i + 1] == 1) { if ( (block.newNumber[i + 1] - block.newNumber[i + 2] != 1) || (i + 2 >= blockNumber) ) { // the shorter one is declared the moved one if (block.newLength[i] < block.newLength[i + 1]) { block.newBlock[i] = 1; block.newBlock[i + 1] = 0; } else { block.newBlock[i] = 0; block.newBlock[i + 1] = 1; } } } } } // mark all others as moved and number the moved blocks j = 1; for (var i = 0; i < blockNumber; i ++) { if ( (block.newBlock[i] == null) || (block.newBlock[i] == 1) ) { block.newBlock[i] = j++; } } // check if a block has been moved from this block border for (var i = 0; i < blockNumber; i ++) { for (var j = 0; j < blockNumber; j ++) { if (block.newNumber[j] == i) { if (block.newBlock[j] > 0) { // block moved right if (block.newNumber[j] < j) { block.newRight[i] = block.newBlock[j]; block.newRightIndex[i] = j; } // block moved left else { block.newLeft[i + 1] = block.newBlock[j]; block.newLeftIndex[i + 1] = j; } } } } } } return; }; // WDiffShortenOutput: remove unchanged parts from final output // input: the output of WDiffString // returns: the text with removed unchanged passages indicated by (...) function WDiffShortenOutput(diffText) { // html <br/> to newlines diffText = diffText.replace(/<br[^>]*>/g, '\n'); // scan for diff html tags var regExpDiff = /<\w+ class="(\w+)"[^>]*>(.|\n)*?<!--\1-->/g; var tagStart = []; var tagEnd = []; var i = 0; var found; while ( (found = regExpDiff.exec(diffText)) != null ) { // combine consecutive diff tags if ( (i > 0) && (tagEnd[i - 1] == found.index) ) { tagEnd[i - 1] = found.index + found[0].length; } else { tagStart[i] = found.index; tagEnd[i] = found.index + found[0].length; i ++; } } // no diff tags detected if (tagStart.length == 0) { return(wDiffNoChange); } // define regexps var regExpHeading = /\n=+.+?=+ *\n|\n\{\||\n\|\}/g; var regExpParagraph = /\n\n+/g; var regExpLine = /\n+/g; var regExpBlank = /(<[^>]+>)*\s+/g; // determine fragment border positions around diff tags var rangeStart = []; var rangeEnd = []; var rangeStartType = []; var rangeEndType = []; for (var i = 0; i < tagStart.length; i ++) { var found; // find last heading before diff tag var lastPos = tagStart[i] - wDiffHeadingBefore; if (lastPos < 0) { lastPos = 0; } regExpHeading.lastIndex = lastPos; while ( (found = regExpHeading.exec(diffText)) != null ) { if (found.index > tagStart[i]) { break; } rangeStart[i] = found.index; rangeStartType[i] = 'heading'; } // find last paragraph before diff tag if (rangeStart[i] == null) { lastPos = tagStart[i] - wDiffParagraphBefore; if (lastPos < 0) { lastPos = 0; } regExpParagraph.lastIndex = lastPos; while ( (found = regExpParagraph.exec(diffText)) != null ) { if (found.index > tagStart[i]) { break; } rangeStart[i] = found.index; rangeStartType[i] = 'paragraph'; } } // find line break before diff tag if (rangeStart[i] == null) { lastPos = tagStart[i] - wDiffLineBeforeMax; if (lastPos < 0) { lastPos = 0; } regExpLine.lastIndex = lastPos; while ( (found = regExpLine.exec(diffText)) != null ) { if (found.index > tagStart[i] - wDiffLineBeforeMin) { break; } rangeStart[i] = found.index; rangeStartType[i] = 'line'; } } // find blank before diff tag if (rangeStart[i] == null) { lastPos = tagStart[i] - wDiffBlankBeforeMax; if (lastPos < 0) { lastPos = 0; } regExpBlank.lastIndex = lastPos; while ( (found = regExpBlank.exec(diffText)) != null ) { if (found.index > tagStart[i] - wDiffBlankBeforeMin) { break; } rangeStart[i] = found.index; rangeStartType[i] = 'blank'; } } // fixed number of chars before diff tag if (rangeStart[i] == null) { rangeStart[i] = tagStart[i] - wDiffCharsBefore; rangeStartType[i] = 'chars'; if (rangeStart[i] < 0) { rangeStart[i] = 0; } } // find first heading after diff tag regExpHeading.lastIndex = tagEnd[i]; if ( (found = regExpHeading.exec(diffText)) != null ) { if (found.index < tagEnd[i] + wDiffHeadingAfter) { rangeEnd[i] = found.index + found[0].length; rangeEndType[i] = 'heading'; } } // find first paragraph after diff tag if (rangeEnd[i] == null) { regExpParagraph.lastIndex = tagEnd[i]; if ( (found = regExpParagraph.exec(diffText)) != null ) { if (found.index < tagEnd[i] + wDiffParagraphAfter) { rangeEnd[i] = found.index; rangeEndType[i] = 'paragraph'; } } } // find first line break after diff tag if (rangeEnd[i] == null) { regExpLine.lastIndex = tagEnd[i] + wDiffLineAfterMin; if ( (found = regExpLine.exec(diffText)) != null ) { if (found.index < tagEnd[i] + wDiffLineAfterMax) { rangeEnd[i] = found.index; rangeEndType[i] = 'break'; } } } // find blank after diff tag if (rangeEnd[i] == null) { regExpBlank.lastIndex = tagEnd[i] + wDiffBlankAfterMin; if ( (found = regExpBlank.exec(diffText)) != null ) { if (found.index < tagEnd[i] + wDiffBlankAfterMax) { rangeEnd[i] = found.index; rangeEndType[i] = 'blank'; } } } // fixed number of chars after diff tag if (rangeEnd[i] == null) { rangeEnd[i] = tagEnd[i] + wDiffCharsAfter; if (rangeEnd[i] > diffText.length) { rangeEnd[i] = diffText.length; rangeEndType[i] = 'chars'; } } } // remove overlaps, join close fragments var fragmentStart = []; var fragmentEnd = []; var fragmentStartType = []; var fragmentEndType = []; fragmentStart[0] = rangeStart[0]; fragmentEnd[0] = rangeEnd[0]; fragmentStartType[0] = rangeStartType[0]; fragmentEndType[0] = rangeEndType[0]; var j = 1; for (var i = 1; i < rangeStart.length; i ++) { if (rangeStart[i] > fragmentEnd[j - 1] + wDiffFragmentJoin) { fragmentStart[j] = rangeStart[i]; fragmentEnd[j] = rangeEnd[i]; fragmentStartType[j] = rangeStartType[i]; fragmentEndType[j] = rangeEndType[i]; j ++; } else { fragmentEnd[j - 1] = rangeEnd[i]; fragmentEndType[j - 1] = rangeEndType[i]; } } // assemble the fragments var outText = ''; for (var i = 0; i < fragmentStart.length; i ++) { // get text fragment var fragment = diffText.substring(fragmentStart[i], fragmentEnd[i]); var fragment = fragment.replace(/^\n+|\n+$/g, ''); // add inline marks for omitted chars and words if (fragmentStart[i] > 0) { if (fragmentStartType[i] == 'chars') { fragment = wDiffOmittedChars + fragment; } else if (fragmentStartType[i] == 'blank') { fragment = wDiffOmittedChars + ' ' + fragment; } } if (fragmentEnd[i] < diffText.length) { if (fragmentStartType[i] == 'chars') { fragment = fragment + wDiffOmittedChars; } else if (fragmentStartType[i] == 'blank') { fragment = fragment + ' ' + wDiffOmittedChars; } } // add omitted line separator if (fragmentStart[i] > 0) { outText += wDiffOmittedLines; } // encapsulate span errors outText += '<div>' + fragment + '</div>'; } // add trailing omitted line separator if (fragmentEnd[i - 1] < diffText.length) { outText = outText + wDiffOmittedLines; } // remove leading and trailing empty lines outText = outText.replace(/^(<div>)\n+|\n+(<\/div>)$/g, '$1$2'); // convert to html linebreaks outText = outText.replace(/\n/g, '<br />'); return(outText); }; $(document).ready(function () { var lhs = decodeURIComponent($('#lhsdata').val()); var rhs = decodeURIComponent($('#rhsdata').val()); file1 = {}; file2 = {}; file1.name = '[buffer 1]'; file2.name = '[buffer 2]'; compare(); }); </script> <link rel="stylesheet" type="text/css" href="reconciler.css" /> </head> <body class="active"> <h1>ITSEE Transcription Reconciler</h1> <div id="header_container"> <div class="column"> <h3>Create New Reconciliation</h3> <label>Select File: </label><input type="file" id="file" name="file" onchange="javascript:uploadFile()"/> <div id="list"> <ul> <li><span style="background: #990033; color: white;">File 1: </span><span class="file_details" id="file1_details"></span></li> <li><span style="background: #009933; color: white;">File 2: </span><span class="file_details" id="file2_details"></span></li> </ul> </div> <span><b>Options</b></span> <br/> <label>Ignore layout information</label><input type="checkbox" id="layout" name="layout" checked="checked"/> <label>Ignore all tags [ ]</label><input type="checkbox" id="tags" name="tags"/> <br/> <label>Treat final nu mark as nu</label><input type="checkbox" id="final_nu" name="final_nu" checked="checked"/> <label>Ignore punctuation</label><input type="checkbox" id="punctuation" name="punctuation" checked="checked"/> <br/> <label>Ignore all comments</label><input type="checkbox" id="all_comments" name="all_comments"/> <label>Ignore Status Note</label><input type="checkbox" id="status_note" name="status_note" checked="checked"/> <br/> <span>Ignore comments containing only:</span> <label>OM</label><input type="checkbox" id="comment_om" name="comment_om" checked="checked"/> <label>comm</label><input type="checkbox" id="comment_comm" name="comment_comm" checked="checked"/> <label>lect</label><input type="checkbox" id="comment_lect" name="comment_lect" checked="checked"/> <br/> <br/> <input type="button" onclick="javascript:compare()" value="Compare"/> </div> <div class="column"> <h3>Save as Text</h3> <input type="button" onclick="javascript:saveText()" value="SaveText"/> <br/> <h3>Save Current State</h3> <input type="button" onclick="javascript:saveReconciliation()" value="Save"/> <br/> <h3>Load Saved Reconciliation</h3> <select id="savedRecons"> <option value="">Select</option> </select> <button onclick="loadRecon();return false;">Load</button> </div> <div class="column"> <h3>Export Current State</h3> <button onclick="exportRecon();return false;">Export</button> <h3>Import Saved Reconciliation</h3> <label>Upload: </label><input type="file" id="reconciliation" name="reconciliation" onchange="javascript:setReconName(uploadReconciliation())"/> </div> </div> <div id="results_container"> <h2>Results</h2> <div id="results_summary"></div> <a id="structure_results_toggle" href="javascript:toggle_results('structure_results', 'structure_results_toggle')"></a> <a id="text_results_toggle" href="javascript:toggle_results('text_results', 'text_results_toggle')"></a> <div id="structure_results" class="results" style="display:none;"></div> <div id="text_results" class="results" style="display:none;"></div> </div> <textarea id="lhsdata" style="display:none;"> <%=HTTPUtils.canonize(lhs)%> </textarea> <textarea id="rhsdata" style="display:none;"> <%=HTTPUtils.canonize(rhs)%> </textarea> </body> </html>