mediawiki-extensions-Visual.../modules/parser/mediawiki.tokenizer.peg.js
2012-01-09 19:33:49 +00:00

171 lines
4.2 KiB
JavaScript

/**
* Tokenizer for wikitext, using PEG.js and a separate PEG grammar file
* (pegTokenizer.pegjs.txt)
*
* Use along with a HTML5TreeBuilder and the DOMPostProcessor(s) for HTML
* output.
*/
var PEG = require('pegjs'),
path = require('path'),
fs = require('fs'),
$ = require('jquery'),
events = require('events');
function PegTokenizer() {
var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' );
this.src = fs.readFileSync( pegSrcPath, 'utf8' );
}
// Inherit from EventEmitter
PegTokenizer.prototype = new events.EventEmitter();
PegTokenizer.prototype.constructor = PegTokenizer;
PegTokenizer.src = false;
PegTokenizer.prototype.process = function( text ) {
var out, err;
if ( !this.parser ) {
// Only create a single parser, as parse() is a static method.
PegTokenizer.prototype.parser = PEG.buildParser(this.src);
// add reference to this for event emission
// XXX: pass a cb into parse() instead, but need to modify pegjs a bit
// for that.
//PegTokenizer.prototype.parser._tokenizer = undefined;
// Print the generated parser source
//console.log(this.parser.toSource());
}
// some normalization
if ( text.substring(text.length - 1) !== "\n" ) {
text += "\n";
}
// XXX: Commented out exception handling during development to get
// reasonable traces. Calling a trace on the extension does not really cut
// it.
//try {
out = this.parser.parse(text);
// emit tokens here until we get that to work per toplevelblock in the
// actual tokenizer
this.emit('chunk', out);
this.emit('end');
//} catch (e) {
//err = e;
//console.trace();
//} finally {
return { err: err };
//}
};
/*****************************************************************************
* LEGACY stuff
*
* This is kept around as a template for the ongoing template expansion work!
* It won't work with the token infrastructure.
*/
/**
* @param {object} tree
* @param {function(tree, error)} callback
*/
PegTokenizer.prototype.expandTree = function(tree, callback) {
var self = this;
var subParseArray = function(listOfTrees) {
var content = [];
$.each(listOfTrees, function(i, subtree) {
self.expandTree(subtree, function(substr, err) {
content.push(tree);
});
});
return content;
};
var src;
if (typeof tree === "string") {
callback(tree);
return;
}
if (tree.type == 'template') {
// expand a template node!
// Resolve a possibly relative link
var templateName = this.env.resolveTitle( tree.target, 'Template' );
this.env.fetchTemplate( tree.target, tree.params || {}, function( templateSrc, error ) {
// @fixme should pre-parse/cache these too?
self.parseToTree( templateSrc, function( templateTree, error ) {
if ( error ) {
callback({
type: 'placeholder',
orig: tree,
content: [
{
// @fixme broken link?
type: 'link',
target: templateName
}
]
});
} else {
callback({
type: 'placeholder',
orig: tree,
content: self.env.expandTemplateArgs( templateTree, tree.params )
});
}
});
} );
// Wait for async...
return;
}
var out = $.extend( tree ); // @fixme prefer a deep copy?
if (tree.content) {
out.content = subParseArray(tree.content);
}
callback(out);
};
PegTokenizer.prototype.initSource = function(callback) {
if (PegTokenizer.src) {
callback();
} else {
if ( typeof parserPlaygroundPegPage !== 'undefined' ) {
$.ajax({
url: wgScriptPath + '/api' + wgScriptExtension,
data: {
format: 'json',
action: 'query',
prop: 'revisions',
rvprop: 'content',
titles: parserPlaygroundPegPage
},
success: function(data, xhr) {
$.each(data.query.pages, function(i, page) {
if (page.revisions && page.revisions.length) {
PegTokenizer.src = page.revisions[0]['*'];
}
});
callback();
},
dataType: 'json',
cache: false
}, 'json');
} else {
$.ajax({
url: mw.config.get('wgParserPlaygroundAssetsPath', mw.config.get('wgExtensionAssetsPath')) + '/ParserPlayground/modules/pegParser.pegjs.txt',
success: function(data) {
PegTokenizer.src = data;
callback();
},
dataType: 'text',
cache: false
});
}
}
};
if (typeof module == "object") {
module.exports.PegTokenizer = PegTokenizer;
}