mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-25 06:46:26 +00:00
a8fa9433c4
token stream. This is the first token transformation exercising the TokenTransformer class as its dispatcher. Template expansions, wiki link formatting, tag sanitation and extensions should be able to use the same dispatcher by registering for specific token types. The parser performance is very slightly improved as the token stream is only traversed once.
629 lines
18 KiB
JavaScript
629 lines
18 KiB
JavaScript
/**
|
|
* Initial parser tests runner for experimental JS parser
|
|
*
|
|
* This pulls all the parserTests.txt items and runs them through the JS
|
|
* parser and JS HTML renderer. Currently no comparison is done on output,
|
|
* as a direct string comparison won't be very encouraging. :)
|
|
*
|
|
* Needs smarter compare, as well as search-y helpers.
|
|
*
|
|
* 2011-07-20 <brion@pobox.com>
|
|
*/
|
|
|
|
(function() {
|
|
//"use strict";
|
|
console.log( "Starting up JS parser tests" );
|
|
|
|
var fs = require('fs'),
|
|
path = require('path'),
|
|
jsDiff = require('diff'),
|
|
colors = require('colors'),
|
|
util = require( 'util' ),
|
|
HTML5 = require('html5').HTML5,
|
|
PEG = require('pegjs'),
|
|
// Handle options/arguments with optimist module
|
|
optimist = require('optimist');
|
|
|
|
// @fixme wrap more or this setup in a common module
|
|
|
|
// track files imported / required
|
|
var fileDependencies = [];
|
|
|
|
// Fetch up some of our wacky parser bits...
|
|
|
|
var basePath = path.join(path.dirname(path.dirname(process.cwd())), 'modules');
|
|
function _require(filename) {
|
|
var fullpath = path.join( basePath, filename );
|
|
fileDependencies.push( fullpath );
|
|
return require( fullpath );
|
|
}
|
|
|
|
function _import(filename, symbols) {
|
|
var module = _require(filename);
|
|
symbols.forEach(function(symbol) {
|
|
global[symbol] = module[symbol];
|
|
});
|
|
}
|
|
|
|
|
|
// For now most modules only need this for $.extend and $.each :)
|
|
global.$ = require('jquery');
|
|
|
|
var pj = path.join;
|
|
|
|
// Our code...
|
|
|
|
var testWhiteList = require('./parserTests-whitelist.js').testWhiteList;
|
|
|
|
_import(pj('parser', 'mediawiki.tokenizer.peg.js'), ['PegTokenizer']);
|
|
_import(pj('parser', 'mediawiki.parser.environment.js'), ['MWParserEnvironment']);
|
|
_import(pj('parser', 'mediawiki.TokenTransformer.js'), ['TokenTransformer']);
|
|
_import(pj('parser', 'ext.cite.taghook.ref.js'), ['MWRefTagHook']);
|
|
|
|
_import(pj('parser', 'mediawiki.HTML5TreeBuilder.node.js'), ['FauxHTML5']);
|
|
_import(pj('parser', 'mediawiki.DOMPostProcessor.js'), ['DOMPostProcessor']);
|
|
|
|
_import(pj('parser', 'ext.core.QuoteTransformer.js'), ['QuoteTransformer']);
|
|
|
|
// WikiDom and serializers
|
|
_require(pj('es', 'es.js'));
|
|
_require(pj('es', 'es.Html.js'));
|
|
_require(pj('es', 'serializers', 'es.AnnotationSerializer.js'));
|
|
_require(pj('es', 'serializers', 'es.HtmlSerializer.js'));
|
|
_require(pj('es', 'serializers', 'es.WikitextSerializer.js'));
|
|
_require(pj('es', 'serializers', 'es.JsonSerializer.js'));
|
|
|
|
|
|
function ParserTests () {
|
|
|
|
this.argv = optimist.usage( 'Usage: $0', {
|
|
'quick': {
|
|
description: 'Suppress diff output of failed tests',
|
|
boolean: true,
|
|
default: false
|
|
},
|
|
'quiet': {
|
|
description: 'Suppress notification of passed tests (shows only failed tests)',
|
|
boolean: true,
|
|
default: false
|
|
},
|
|
'color': {
|
|
description: 'Enable color output Ex: --no-color',
|
|
boolean: true,
|
|
default: true
|
|
},
|
|
'cache': {
|
|
description: 'Get tests cases from cache file ' + this.cache_file,
|
|
boolean: true,
|
|
default: false
|
|
},
|
|
'filter': {
|
|
description: 'Only run tests whose descriptions which match given regex',
|
|
alias: 'regex'
|
|
},
|
|
'whitelist': {
|
|
description: 'Alternatively compare against manually verified parser output from whitelist',
|
|
default: true,
|
|
boolean: true
|
|
},
|
|
'help': {
|
|
description: 'Show this help message',
|
|
alias: 'h'
|
|
},
|
|
'disabled': {
|
|
description: 'Run disabled tests (option not implemented)',
|
|
default: false,
|
|
boolean: true
|
|
},
|
|
'printwhitelist': {
|
|
description: 'Print out a whitelist entry for failing tests. Default false.',
|
|
default: false,
|
|
boolean: true
|
|
}
|
|
}
|
|
).check( function(argv) {
|
|
if( argv.filter === true ) {
|
|
throw "--filter need an argument";
|
|
}
|
|
}
|
|
).argv; // keep that
|
|
|
|
|
|
if( this.argv.help ) {
|
|
optimist.showHelp();
|
|
process.exit( 0 );
|
|
}
|
|
this.test_filter = null;
|
|
if( this.argv.filter ) { // null is the default by definition
|
|
try {
|
|
this.test_filter = new RegExp( this.argv.filter );
|
|
} catch(e) {
|
|
console.error( "\nERROR> --filter was given an invalid regular expression.");
|
|
console.error( "ERROR> See below for JS engine error:\n" + e + "\n" );
|
|
process.exit( 1 );
|
|
}
|
|
console.log( "Filtering title test using Regexp " + this.test_filter );
|
|
}
|
|
if( !this.argv.color ) {
|
|
colors.mode = 'none';
|
|
}
|
|
|
|
// Name of file used to cache the parser tests cases
|
|
this.cache_file = "parserTests.cache";
|
|
|
|
// Preload the grammar file...
|
|
PegTokenizer.src = fs.readFileSync(path.join(basePath, 'parser', 'pegTokenizer.pegjs.txt'), 'utf8');
|
|
|
|
this.wikiTokenizer = new PegTokenizer();
|
|
|
|
this.testFileName = '../../../../phase3/tests/parser/parserTests.txt'; // default
|
|
this.testFileName2 = '../../../../tests/parser/parserTests.txt'; // Fallback. Not everyone fetch at phase3 level
|
|
|
|
if (this.argv._[0]) {
|
|
// hack :D
|
|
this.testFileName = this.argv._[0] ;
|
|
this.testFileName2 = null;
|
|
}
|
|
|
|
try {
|
|
this.testParser = PEG.buildParser(fs.readFileSync('parserTests.pegjs', 'utf8'));
|
|
} catch (e) {
|
|
console.log(e);
|
|
}
|
|
|
|
this.cases = this.getTests();
|
|
|
|
this.articles = {};
|
|
|
|
this.htmlparser = new HTML5.Parser();
|
|
|
|
this.postProcessor = new DOMPostProcessor();
|
|
|
|
var pt = this;
|
|
|
|
// Set up the TokenTransformer with a callback for the remaining
|
|
// processing.
|
|
this.tokenTransformer = new TokenTransformer ( function ( tokens ) {
|
|
|
|
//console.log("TOKENS: " + JSON.stringify(tokens, null, 2));
|
|
|
|
// Create a new tree builder, which also creates a new document.
|
|
var treeBuilder = new FauxHTML5.TreeBuilder();
|
|
|
|
// Build a DOM tree from tokens using the HTML tree builder/parser.
|
|
pt.buildTree( tokens, treeBuilder );
|
|
|
|
// Perform post-processing on DOM.
|
|
pt.postProcessor.doPostProcess(treeBuilder.parser.document);
|
|
|
|
// And serialize the result.
|
|
var out = treeBuilder.body().innerHTML;
|
|
|
|
// Finally, check the result vs. the expected result.
|
|
pt.checkResult( pt.currentItem, out );
|
|
});
|
|
|
|
// Add token transformations..
|
|
var qt = new QuoteTransformer();
|
|
qt.register(this.tokenTransformer);
|
|
|
|
// Test statistics
|
|
this.passedTests = 0;
|
|
this.passedTestsManual = 0;
|
|
this.failParseTests = 0;
|
|
this.failTreeTests = 0;
|
|
this.failOutputTests = 0;
|
|
|
|
this.currentItem = undefined;
|
|
|
|
return this;
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
* Get an object holding our tests cases. Eventually from a cache file
|
|
*/
|
|
ParserTests.prototype.getTests = function () {
|
|
|
|
// Startup by loading .txt test file
|
|
var testFile;
|
|
try {
|
|
testFile = fs.readFileSync(this.testFileName, 'utf8');
|
|
fileDependencies.push( this.testFileName );
|
|
} catch (e) {
|
|
// Try opening fallback file
|
|
if( this.testFileName2 !== '' ) {
|
|
try {
|
|
testFile = fs.readFileSync( this.testFileName2, 'utf8' );
|
|
fileDependencies.push( this.testFileName2 );
|
|
}
|
|
catch(e) { console.log(e); }
|
|
}
|
|
}
|
|
if( !this.argv.cache ) {
|
|
// Cache not wanted, parse file and return object
|
|
return this.parseTestCase( testFile );
|
|
}
|
|
|
|
// Find out modification time of all files depencies and then hashes those
|
|
// as a unique value using sha1.
|
|
var mtimes = '';
|
|
fileDependencies.sort().forEach( function (file) {
|
|
mtimes += fs.statSync( file ).mtime;
|
|
});
|
|
var sha1 = require('crypto').createHash('sha1')
|
|
.update( mtimes ).digest( 'hex' );
|
|
|
|
// Look for a cache_file
|
|
var cache_content;
|
|
var cache_file_digest;
|
|
try {
|
|
console.log( "Looking for cache file " + this.cache_file );
|
|
cache_content = fs.readFileSync( this.cache_file, 'utf8' );
|
|
// Fetch previous digest
|
|
cache_file_digest = cache_content.match( /^CACHE: (\w+)\n/ )[1];
|
|
} catch(e) {
|
|
// cache file does not exist
|
|
}
|
|
|
|
if( cache_file_digest === sha1 ) {
|
|
// cache file match our digest.
|
|
console.log( "Loaded tests cases from cache file" );
|
|
// Return contained object after removing first line (CACHE: <sha1>)
|
|
return JSON.parse( cache_content.replace( /.*\n/, '' ) );
|
|
} else {
|
|
// Write new file cache, content preprended with current digest
|
|
console.log( "Cache file either inexistant or outdated" );
|
|
var parse = this.parseTestCase( testFile );
|
|
console.log( "Writing parse result to " + this.cache_file );
|
|
fs.writeFileSync( this.cache_file,
|
|
"CACHE: " + sha1 + "\n" + JSON.stringify( parse ),
|
|
'utf8'
|
|
);
|
|
|
|
// We can now return the parsed object
|
|
return parse;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Parse given tests cases given as plaintext
|
|
*/
|
|
ParserTests.prototype.parseTestCase = function ( content ) {
|
|
console.log( "Parsing tests case from file, this takes a few seconds ..." );
|
|
try {
|
|
console.log( "Done parsing." );
|
|
return this.testParser.parse(content);
|
|
} catch (e) {
|
|
console.log(e);
|
|
}
|
|
};
|
|
|
|
|
|
ParserTests.prototype.normalizeTitle = function(name) {
|
|
if (typeof name !== 'string') {
|
|
throw new Error('nooooooooo not a string');
|
|
}
|
|
name = name.replace(/[\s_]+/g, '_');
|
|
name = name.substr(0, 1).toUpperCase() + name.substr(1);
|
|
if (name === '') {
|
|
throw new Error('Invalid/empty title');
|
|
}
|
|
return name;
|
|
};
|
|
|
|
ParserTests.prototype.fetchArticle = function(name) {
|
|
// very simple for now..
|
|
var norm = normalizeTitle(name);
|
|
if (norm in this.articles) {
|
|
return this.articles[norm];
|
|
}
|
|
};
|
|
|
|
ParserTests.prototype.processArticle = function(item) {
|
|
var norm = this.normalizeTitle(item.title);
|
|
this.articles[norm] = item.text;
|
|
};
|
|
|
|
|
|
/* Normalize the expected parser output by parsing it using a HTML5 parser and
|
|
* re-serializing it to HTML. Ideally, the parser would normalize inter-tag
|
|
* whitespace for us. For now, we fake that by simply stripping all newlines.
|
|
*/
|
|
ParserTests.prototype.normalizeHTML = function (source) {
|
|
// TODO: Do not strip newlines in pre and nowiki blocks!
|
|
source = source.replace(/\n/g, '');
|
|
try {
|
|
this.htmlparser.parse('<body>' + source + '</body>');
|
|
return this.htmlparser.document
|
|
.getElementsByTagName('body')[0]
|
|
.innerHTML
|
|
// a few things we ignore for now..
|
|
.replace(/\/wiki\/Main_Page/g, 'Main Page')
|
|
// do not expect a toc for now
|
|
.replace(/<table[^>]+?id="toc"[^>]*>.+?<\/table>/mg, '')
|
|
// do not expect section editing for now
|
|
.replace(/(<span class="editsection">\[.*?<\/span> *)?<span[^>]+class="mw-headline"[^>]*>(.*?)<\/span>/g, '$2')
|
|
// general class and titles, typically on links
|
|
.replace(/(title|class|rel)="[^"]+"/g, '')
|
|
// strip red link markup, we do not check if a page exists yet
|
|
.replace(/\/index.php\?title=|&action=edit&redlink=1/g, '')
|
|
// the expected html has some extra space in tags, strip it
|
|
.replace(/<a +href/g, '<a href')
|
|
.replace(/" +>/g, '">');
|
|
} catch(e) {
|
|
console.log("normalizeHTML failed on" +
|
|
source + " with the following error: " + e);
|
|
console.trace();
|
|
return source;
|
|
}
|
|
|
|
};
|
|
|
|
// Specialized normalization of the wiki parser output, mostly to ignore a few
|
|
// known-ok differences.
|
|
ParserTests.prototype.normalizeOut = function ( out ) {
|
|
// TODO: Do not strip newlines in pre and nowiki blocks!
|
|
return out.replace(/\n| data-[a-zA-Z]+="[^">]*"/g, '')
|
|
.replace(/<!--.*?-->\n?/gm, '');
|
|
};
|
|
|
|
ParserTests.prototype.formatHTML = function ( source ) {
|
|
// Quick hack to insert newlines before some block level start tags
|
|
return source.replace(
|
|
/(?!^)<((div|dd|dt|li|p|table|tr|td|tbody|dl|ol|ul|h1|h2|h3|h4|h5|h6)[^>]*)>/g,
|
|
'\n<$1>');
|
|
};
|
|
|
|
|
|
|
|
ParserTests.prototype.printTitle = function( item, failure_only ) {
|
|
if( failure_only ) {
|
|
console.log('FAILED'.red + ': ' + item.title.yellow);
|
|
return;
|
|
}
|
|
console.log('=====================================================');
|
|
console.log('FAILED'.red + ': ' + item.title.yellow);
|
|
console.log(item.comments.join('\n'));
|
|
if (item.options) {
|
|
console.log("OPTIONS".cyan + ":");
|
|
console.log(item.options + '\n');
|
|
}
|
|
console.log("INPUT".cyan + ":");
|
|
console.log(item.input + "\n");
|
|
};
|
|
|
|
|
|
|
|
ParserTests.prototype.processTest = function (item) {
|
|
if (!('title' in item)) {
|
|
console.log(item);
|
|
throw new Error('Missing title from test case.');
|
|
}
|
|
if (!('input' in item)) {
|
|
console.log(item);
|
|
throw new Error('Missing input from test case ' + item.title);
|
|
}
|
|
if (!('result' in item)) {
|
|
console.log(item);
|
|
throw new Error('Missing input from test case ' + item.title);
|
|
}
|
|
|
|
this.currentItem = item;
|
|
|
|
// Tokenize the input
|
|
var res = this.wikiTokenizer.tokenize(item.input + "\n");
|
|
|
|
// Check for errors
|
|
if (res.err) {
|
|
this.printTitle(item);
|
|
this.failParseTests++;
|
|
console.log('PARSE FAIL', res.err);
|
|
} else {
|
|
//var environment = new MWParserEnvironment({
|
|
// tagHooks: {
|
|
// 'ref': MWRefTagHook,
|
|
// 'references': MWReferencesTagHook
|
|
// }
|
|
//});
|
|
//var res = es.HtmlSerializer.stringify(tokens,environment);
|
|
|
|
//console.log(JSON.stringify(tokens));
|
|
//Slightly better token output debugging:
|
|
//console.log( util.inspect( tokens, false, null ).yellow);
|
|
|
|
// Transform tokens using the TokenTransformer. When done, the
|
|
// TokenTransformer calls buildTree() and checkResult() with the
|
|
// transformed tokens.
|
|
this.tokenTransformer.transformTokens( res.tokens );
|
|
}
|
|
};
|
|
|
|
ParserTests.prototype.checkResult = function ( item, out ) {
|
|
var normalizedOut = this.normalizeOut(out);
|
|
var normalizedExpected = this.normalizeHTML(item.result);
|
|
if ( normalizedOut !== normalizedExpected ) {
|
|
if (this.argv.whitelist &&
|
|
item.title in testWhiteList &&
|
|
this.normalizeOut(testWhiteList[item.title]) === normalizedOut) {
|
|
if( !this.argv.quiet ) {
|
|
console.log( 'PASSED (whiteList)'.green + ': ' + item.title.yellow );
|
|
}
|
|
this.passedTestsManual++;
|
|
return;
|
|
}
|
|
this.printTitle( item, this.argv.quick );
|
|
this.failOutputTests++;
|
|
|
|
if( !this.argv.quick ) {
|
|
console.log('RAW EXPECTED'.cyan + ':');
|
|
console.log(item.result + "\n");
|
|
|
|
console.log('RAW RENDERED'.cyan + ':');
|
|
console.log(this.formatHTML(out) + "\n");
|
|
|
|
var a = this.formatHTML(normalizedExpected);
|
|
|
|
console.log('NORMALIZED EXPECTED'.magenta + ':');
|
|
console.log(a + "\n");
|
|
|
|
var b = this.formatHTML(normalizedOut);
|
|
|
|
console.log('NORMALIZED RENDERED'.magenta + ':');
|
|
console.log(this.formatHTML(this.normalizeOut(out)) + "\n");
|
|
var patch = jsDiff.createPatch('wikitext.txt', a, b, 'before', 'after');
|
|
|
|
console.log('DIFF'.cyan +': ');
|
|
|
|
// Strip the header from the patch, we know how diffs work..
|
|
patch = patch.replace(/^[^\n]*\n[^\n]*\n[^\n]*\n[^\n]*\n/, '');
|
|
|
|
var colored_diff = patch.split( '\n' ).map( function(line) {
|
|
// Add some colors to diff output
|
|
switch( line.charAt(0) ) {
|
|
case '-':
|
|
return line.red;
|
|
case '+':
|
|
return line.blue;
|
|
default:
|
|
return line;
|
|
}
|
|
}).join( "\n" );
|
|
|
|
|
|
console.log( colored_diff );
|
|
|
|
if(this.argv.printwhitelist) {
|
|
console.log("WHITELIST ENTRY:".cyan);
|
|
console.log("testWhiteList[" +
|
|
JSON.stringify(item.title) + "] = " +
|
|
JSON.stringify(out) +
|
|
";\n");
|
|
}
|
|
}
|
|
} else {
|
|
this.passedTests++;
|
|
if( !this.argv.quiet ) {
|
|
console.log( 'PASSED'.green + ': ' + item.title.yellow );
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
ParserTests.prototype.buildTree = function ( tokens, treeBuilder ) {
|
|
// push a body element, just to be sure to have one
|
|
treeBuilder.processToken({type: 'TAG', name: 'body'});
|
|
// Process all tokens
|
|
for (var i = 0, length = tokens.length; i < length; i++) {
|
|
treeBuilder.processToken(tokens[i]);
|
|
}
|
|
// And signal the end
|
|
treeBuilder.processToken({type: 'END'});
|
|
};
|
|
|
|
/**
|
|
* Colorize given number if <> 0
|
|
*
|
|
* @param count Integer: a number to colorize
|
|
* @param color String: 'green' or 'red'
|
|
*/
|
|
ParserTests.prototype.ColorizeCount = function ( count, color ) {
|
|
if( count === 0 ) {
|
|
return count;
|
|
}
|
|
|
|
// We need a string to use colors methods
|
|
count = count.toString();
|
|
// FIXME there must be a wait to call a method by its name
|
|
switch( color ) {
|
|
case 'green': return count.green;
|
|
case 'red': return count.red;
|
|
|
|
default: return count;
|
|
}
|
|
};
|
|
|
|
ParserTests.prototype.reportSummary = function () {
|
|
|
|
var failTotalTests = (this.failParseTests + this.failTreeTests +
|
|
this.failOutputTests);
|
|
|
|
console.log( "==========================================================");
|
|
console.log( "SUMMARY: ");
|
|
|
|
if( failTotalTests !== 0 ) {
|
|
console.log( this.ColorizeCount( this.passedTests , 'green' ) +
|
|
" passed");
|
|
console.log( this.ColorizeCount( this.passedTestsManual , 'green' ) +
|
|
" passed from whitelist");
|
|
console.log( this.ColorizeCount( this.failParseTests , 'red' ) +
|
|
" parse failures");
|
|
console.log( this.ColorizeCount( this.failTreeTests , 'red' ) +
|
|
" tree build failures");
|
|
console.log( this.ColorizeCount( this.failOutputTests, 'red' ) +
|
|
" output differences");
|
|
console.log( "\n" );
|
|
console.log( this.ColorizeCount( this.passedTests + this.passedTestsManual , 'green' ) +
|
|
' total passed tests, ' +
|
|
this.ColorizeCount( failTotalTests , 'red' ) + " total failures");
|
|
|
|
} else {
|
|
if( this.test_filter !== null ) {
|
|
console.log( "Passed " + this.passedTests + pthis.assedTestsManual +
|
|
" of " + passedTests + " tests matching " + this.test_filter +
|
|
"... " + "ALL TESTS PASSED!".green );
|
|
} else {
|
|
// Should not happen if it does: Champagne!
|
|
console.log( "Passed " + this.passedTests + " of " + this.passedTests +
|
|
" tests... " + "ALL TESTS PASSED!".green );
|
|
}
|
|
}
|
|
console.log( "==========================================================");
|
|
|
|
};
|
|
|
|
ParserTests.prototype.main = function () {
|
|
console.log( "Initialisation complete. Now launching tests." );
|
|
|
|
var comments = [],
|
|
pt = this;
|
|
this.cases.forEach(function(item) {
|
|
if (typeof item == 'object') {
|
|
switch(item.type) {
|
|
case 'article':
|
|
pt.processArticle(item);
|
|
comments = [];
|
|
break;
|
|
case 'test':
|
|
if( pt.test_filter &&
|
|
-1 === item.title.search( pt.test_filter ) ) {
|
|
// Skip test whose title does not match --filter
|
|
break;
|
|
}
|
|
// Add comments to following test.
|
|
item.comments = comments;
|
|
comments = [];
|
|
pt.processTest(item);
|
|
break;
|
|
case 'comment':
|
|
comments.push(item.comment);
|
|
break;
|
|
default:
|
|
comments = [];
|
|
break;
|
|
}
|
|
}
|
|
});
|
|
|
|
// print out the summary
|
|
this.reportSummary();
|
|
};
|
|
|
|
// Construct the ParserTests object and run the parser tests
|
|
new ParserTests().main();
|
|
|
|
|
|
})();
|