mirror of
https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor
synced 2024-11-15 18:39:52 +00:00
refactor parser to ParseThingy in different module, can be invoked with command line utility parse.js
This commit is contained in:
parent
aedc6751ae
commit
4158f82d7e
Notes:
Gabriel Wicke
2012-02-27 16:40:01 +00:00
|
@ -22,6 +22,8 @@
|
|||
* integrate the general environment (configuration, cache etc). (gwicke)
|
||||
* */
|
||||
|
||||
$ = require('jquery');
|
||||
|
||||
/**
|
||||
* Central dispatcher for potentially asynchronous token transformations.
|
||||
*
|
||||
|
|
106
modules/parser/mediawiki.parser.js
Normal file
106
modules/parser/mediawiki.parser.js
Normal file
|
@ -0,0 +1,106 @@
|
|||
/**
|
||||
*
|
||||
* Simple parser class. Should have lots of options for observing parse stages (or, use events).
|
||||
*
|
||||
* @author Gabriel Wicke <gwicke@wikimedia.org>
|
||||
* @author Neil Kandalgaonkar <neilk@wikimedia.org>
|
||||
*/
|
||||
|
||||
var fs = require('fs'),
|
||||
path = require('path'),
|
||||
$ = require('jquery'),
|
||||
PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer,
|
||||
TokenTransformDispatcher = require('./mediawiki.TokenTransformDispatcher.js').TokenTransformDispatcher,
|
||||
DOMPostProcessor = require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor,
|
||||
DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter,
|
||||
QuoteTransformer = require('./ext.core.QuoteTransformer.js').QuoteTransformer,
|
||||
Cite = require('./ext.Cite.js').Cite,
|
||||
MWRefTagHook = require('./ext.cite.taghook.ref.js').MWRefTagHook,
|
||||
FauxHTML5 = require('./mediawiki.HTML5TreeBuilder.node.js').FauxHTML5;
|
||||
|
||||
function ParseThingy( config ) {
|
||||
|
||||
if ( !config ) {
|
||||
config = {};
|
||||
}
|
||||
|
||||
if ( !config.peg ) {
|
||||
// n.b. __dirname is relative to the module.
|
||||
var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' );
|
||||
config.peg = fs.readFileSync( pegSrcPath, 'utf8' );
|
||||
}
|
||||
|
||||
// XXX parser environment? Will be needed for fetching templates, etc.
|
||||
|
||||
this.wikiTokenizer = new PegTokenizer(config.parserEnv, config.peg);
|
||||
|
||||
this.postProcessor = new DOMPostProcessor();
|
||||
|
||||
this.DOMConverter = new DOMConverter();
|
||||
|
||||
var pthingy = this;
|
||||
|
||||
// Set up the TokenTransformDispatcher with a callback for the remaining
|
||||
// processing.
|
||||
this.tokenDispatcher = new TokenTransformDispatcher ( function ( tokens ) {
|
||||
|
||||
//console.log("TOKENS: " + JSON.stringify(tokens, null, 2));
|
||||
|
||||
// Create a new tree builder, which also creates a new document.
|
||||
var treeBuilder = new FauxHTML5.TreeBuilder();
|
||||
|
||||
// Build a DOM tree from tokens using the HTML tree builder/parser.
|
||||
pthingy.buildTree( tokens, treeBuilder );
|
||||
|
||||
// Perform post-processing on DOM.
|
||||
pthingy.postProcessor.doPostProcess(treeBuilder.document);
|
||||
|
||||
// And serialize the result.
|
||||
// XXX fix this -- make it a method
|
||||
pthingy.out = treeBuilder.document.body.innerHTML;
|
||||
|
||||
// XXX fix this -- make it a method
|
||||
pthingy.getWikiDom = function() {
|
||||
return JSON.stringify(
|
||||
pthingy.DOMConverter.HTMLtoWiki( treeBuilder.document.body ),
|
||||
null,
|
||||
2
|
||||
) + "\n";
|
||||
};
|
||||
|
||||
// XXX pull HTML5 htmlparser fixups into this module? Or leave in tests?
|
||||
|
||||
|
||||
});
|
||||
|
||||
// Add token transformations..
|
||||
var qt = new QuoteTransformer();
|
||||
qt.register(this.tokenDispatcher);
|
||||
|
||||
var citeExtension = new Cite();
|
||||
citeExtension.register(this.tokenDispatcher);
|
||||
|
||||
}
|
||||
|
||||
|
||||
ParseThingy.prototype = {
|
||||
buildTree: function ( tokens, treeBuilder ) {
|
||||
// push a body element, just to be sure to have one
|
||||
treeBuilder.processToken({type: 'TAG', name: 'body'});
|
||||
// Process all tokens
|
||||
for (var i = 0, length = tokens.length; i < length; i++) {
|
||||
treeBuilder.processToken(tokens[i]);
|
||||
}
|
||||
|
||||
// FIXME HACK: For some reason the end token is not processed sometimes,
|
||||
// which normally fixes the body reference up.
|
||||
treeBuilder.document.body = treeBuilder.parser
|
||||
.document.getElementsByTagName('body')[0];
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
if (typeof module == "object") {
|
||||
module.exports.ParseThingy = ParseThingy;
|
||||
}
|
||||
|
49
modules/parser/parse.js
Normal file
49
modules/parser/parse.js
Normal file
|
@ -0,0 +1,49 @@
|
|||
/**
|
||||
* Command line wikidom parse utility.
|
||||
* Read from STDIN, write to STDOUT.
|
||||
*/
|
||||
|
||||
|
||||
( function() {
|
||||
|
||||
var ParseThingy = require('./mediawiki.parser.js').ParseThingy,
|
||||
optimist = require('optimist');
|
||||
|
||||
var parser = new ParseThingy();
|
||||
|
||||
|
||||
process.stdin.resume();
|
||||
process.stdin.setEncoding('utf8');
|
||||
|
||||
var inputChunks = [];
|
||||
process.stdin.on( 'data', function( chunk ) {
|
||||
inputChunks.push( chunk );
|
||||
} );
|
||||
|
||||
process.stdin.on( 'end', function() {
|
||||
var input = inputChunks.join('');
|
||||
var output = getOutput(parser, input);
|
||||
process.stdout.write( output );
|
||||
process.exit(0);
|
||||
} );
|
||||
|
||||
/**
|
||||
* @param {ParseThingy} parser
|
||||
* @param {String} text
|
||||
*/
|
||||
function getOutput( parser, input ) {
|
||||
var res = parser.wikiTokenizer.tokenize(input + "\n");
|
||||
if (res.err) {
|
||||
console.log('PARSE FAIL', res.err);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Append the end
|
||||
res.tokens.push({type: 'END'});
|
||||
|
||||
parser.tokenDispatcher.transformTokens( res.tokens );
|
||||
|
||||
return parser.getWikiDom();
|
||||
}
|
||||
|
||||
} )();
|
|
@ -14,83 +14,6 @@
|
|||
|
||||
(function() {
|
||||
|
||||
// temporary
|
||||
function ParseThingy( config ) {
|
||||
// create tokenizer
|
||||
// Preload the grammar file...
|
||||
var peg = fs.readFileSync( config.pegSrcPath, 'utf8' );
|
||||
var parserEnv = {};
|
||||
//var parserEnv = new MWParserEnvironment({
|
||||
// tagHooks: {
|
||||
// 'ref': MWRefTagHook,
|
||||
// 'references': MWReferencesTagHook
|
||||
// }
|
||||
//});
|
||||
this.wikiTokenizer = new PegTokenizer(parserEnv, peg);
|
||||
|
||||
|
||||
this.postProcessor = new DOMPostProcessor();
|
||||
|
||||
this.DOMConverter = new DOMConverter();
|
||||
|
||||
var pthingy = this;
|
||||
|
||||
// Set up the TokenTransformDispatcher with a callback for the remaining
|
||||
// processing.
|
||||
this.tokenDispatcher = new TokenTransformDispatcher ( function ( tokens ) {
|
||||
|
||||
//console.log("TOKENS: " + JSON.stringify(tokens, null, 2));
|
||||
|
||||
// Create a new tree builder, which also creates a new document.
|
||||
var treeBuilder = new FauxHTML5.TreeBuilder();
|
||||
|
||||
// Build a DOM tree from tokens using the HTML tree builder/parser.
|
||||
pthingy.buildTree( tokens, treeBuilder );
|
||||
|
||||
// Perform post-processing on DOM.
|
||||
pthingy.postProcessor.doPostProcess(treeBuilder.document);
|
||||
|
||||
// And serialize the result.
|
||||
// XXX fix this -- make it a method
|
||||
pthingy.out = treeBuilder.document.body.innerHTML;
|
||||
|
||||
// XXX fix this -- make it a method
|
||||
pthingy.getWikiDom = function() {
|
||||
return JSON.stringify(
|
||||
pthingy.DOMConverter.HTMLtoWiki( treeBuilder.document.body ),
|
||||
null,
|
||||
2
|
||||
) + "\n";
|
||||
};
|
||||
|
||||
});
|
||||
|
||||
// Add token transformations..
|
||||
var qt = new QuoteTransformer();
|
||||
qt.register(this.tokenDispatcher);
|
||||
|
||||
var citeExtension = new Cite();
|
||||
citeExtension.register(this.tokenDispatcher);
|
||||
|
||||
}
|
||||
|
||||
ParseThingy.prototype = {
|
||||
buildTree: function ( tokens, treeBuilder ) {
|
||||
// push a body element, just to be sure to have one
|
||||
treeBuilder.processToken({type: 'TAG', name: 'body'});
|
||||
// Process all tokens
|
||||
for (var i = 0, length = tokens.length; i < length; i++) {
|
||||
treeBuilder.processToken(tokens[i]);
|
||||
}
|
||||
|
||||
// FIXME HACK: For some reason the end token is not processed sometimes,
|
||||
// which normally fixes the body reference up.
|
||||
treeBuilder.document.body = treeBuilder.parser
|
||||
.document.getElementsByTagName('body')[0];
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
console.log( "Starting up JS parser tests" );
|
||||
|
@ -100,13 +23,11 @@ var fs = require('fs'),
|
|||
jsDiff = require('diff'),
|
||||
colors = require('colors'),
|
||||
util = require( 'util' ),
|
||||
HTML5 = require('html5').HTML5,
|
||||
HTML5 = require('html5').HTML5, //TODO is this fixup for tests only, or part of real parsing...
|
||||
PEG = require('pegjs'),
|
||||
// Handle options/arguments with optimist module
|
||||
optimist = require('optimist');
|
||||
|
||||
// @fixme wrap more or this setup in a common module
|
||||
|
||||
// track files imported / required
|
||||
var fileDependencies = [];
|
||||
|
||||
|
@ -137,19 +58,8 @@ var pj = path.join;
|
|||
|
||||
var testWhiteList = require('./parserTests-whitelist.js').testWhiteList;
|
||||
|
||||
_import(pj('parser', 'mediawiki.tokenizer.peg.js'), ['PegTokenizer']);
|
||||
_import(pj('parser', 'mediawiki.parser.environment.js'), ['MWParserEnvironment']);
|
||||
_import(pj('parser', 'mediawiki.TokenTransformDispatcher.js'), ['TokenTransformDispatcher']);
|
||||
_import(pj('parser', 'ext.cite.taghook.ref.js'), ['MWRefTagHook']);
|
||||
|
||||
_import(pj('parser', 'mediawiki.HTML5TreeBuilder.node.js'), ['FauxHTML5']);
|
||||
_import(pj('parser', 'mediawiki.DOMPostProcessor.js'), ['DOMPostProcessor']);
|
||||
|
||||
_import(pj('parser', 'mediawiki.DOMConverter.js'), ['DOMConverter']);
|
||||
|
||||
_import(pj('parser', 'ext.core.QuoteTransformer.js'), ['QuoteTransformer']);
|
||||
|
||||
_import(pj('parser', 'ext.Cite.js'), ['Cite']);
|
||||
_import(pj('parser', 'mediawiki.parser.js'), ['ParseThingy']);
|
||||
|
||||
// WikiDom and serializers
|
||||
//_require(pj('es', 'es.js'));
|
||||
|
@ -653,10 +563,16 @@ ParserTests.prototype.reportSummary = function () {
|
|||
|
||||
ParserTests.prototype.main = function () {
|
||||
console.log( "Initialisation complete. Now launching tests." );
|
||||
//var parserEnv = new MWParserEnvironment({
|
||||
// tagHooks: {
|
||||
// 'ref': MWRefTagHook,
|
||||
// 'references': MWReferencesTagHook
|
||||
// }
|
||||
//});
|
||||
|
||||
// move this config out of here
|
||||
var config = {
|
||||
pegSrcPath: path.join(basePath, 'parser', 'pegTokenizer.pegjs.txt')
|
||||
parserEnv: {}
|
||||
};
|
||||
var pThingy = new ParseThingy(config);
|
||||
|
||||
|
|
Loading…
Reference in a new issue