2011-12-28 01:37:06 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* Simple parser class. Should have lots of options for observing parse stages (or, use events).
|
|
|
|
*
|
|
|
|
* @author Gabriel Wicke <gwicke@wikimedia.org>
|
|
|
|
* @author Neil Kandalgaonkar <neilk@wikimedia.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
var fs = require('fs'),
|
|
|
|
path = require('path'),
|
|
|
|
PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer,
|
|
|
|
TokenTransformDispatcher = require('./mediawiki.TokenTransformDispatcher.js').TokenTransformDispatcher,
|
|
|
|
DOMPostProcessor = require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor,
|
|
|
|
DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter,
|
|
|
|
QuoteTransformer = require('./ext.core.QuoteTransformer.js').QuoteTransformer,
|
|
|
|
Cite = require('./ext.Cite.js').Cite,
|
|
|
|
MWRefTagHook = require('./ext.cite.taghook.ref.js').MWRefTagHook,
|
|
|
|
FauxHTML5 = require('./mediawiki.HTML5TreeBuilder.node.js').FauxHTML5;
|
|
|
|
|
|
|
|
function ParseThingy( config ) {
|
2011-12-28 13:46:52 +00:00
|
|
|
// XXX: move the actual parsing to separate method, only perform pipeline
|
|
|
|
// setup in the constructor!
|
2011-12-28 01:37:06 +00:00
|
|
|
|
|
|
|
if ( !config ) {
|
|
|
|
config = {};
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-12-28 17:04:16 +00:00
|
|
|
this.wikiTokenizer = new PegTokenizer();
|
2011-12-28 01:37:06 +00:00
|
|
|
|
|
|
|
this.postProcessor = new DOMPostProcessor();
|
|
|
|
|
|
|
|
this.DOMConverter = new DOMConverter();
|
|
|
|
|
|
|
|
var pthingy = this;
|
|
|
|
|
|
|
|
// Set up the TokenTransformDispatcher with a callback for the remaining
|
|
|
|
// processing.
|
2011-12-28 13:46:52 +00:00
|
|
|
// XXX: convert to event listener (listening for token chunks from
|
|
|
|
// tokenizer) and event emitter (emitting token chunks)
|
|
|
|
// XXX: A parser environment and configuration will be added here to the
|
|
|
|
// token transform dispatcher.
|
2011-12-28 01:37:06 +00:00
|
|
|
this.tokenDispatcher = new TokenTransformDispatcher ( function ( tokens ) {
|
|
|
|
|
|
|
|
//console.log("TOKENS: " + JSON.stringify(tokens, null, 2));
|
|
|
|
|
2011-12-28 13:46:52 +00:00
|
|
|
// Create a new tree builder, which also creates a new document.
|
|
|
|
// XXX: implicitly clean up old state after processing end token, so
|
|
|
|
// that we can reuse the tree builder.
|
|
|
|
// XXX: convert to event listener listening for token chunks from the
|
|
|
|
// token transformer and and emitting an additional 'done' event after
|
|
|
|
// processing the 'end' token.
|
2011-12-28 01:37:06 +00:00
|
|
|
var treeBuilder = new FauxHTML5.TreeBuilder();
|
|
|
|
|
|
|
|
// Build a DOM tree from tokens using the HTML tree builder/parser.
|
2011-12-28 13:46:52 +00:00
|
|
|
// XXX: convert to event listener (token chunks from
|
|
|
|
// TokenTransformDispatcher) and event emitter (DOM tree to
|
|
|
|
// DOMPostProcessor)
|
2011-12-28 01:37:06 +00:00
|
|
|
pthingy.buildTree( tokens, treeBuilder );
|
|
|
|
|
|
|
|
// Perform post-processing on DOM.
|
2011-12-28 13:46:52 +00:00
|
|
|
// XXX: convert to event listener (listening on treeBuilder 'finished'
|
|
|
|
// event)
|
2011-12-28 01:37:06 +00:00
|
|
|
pthingy.postProcessor.doPostProcess(treeBuilder.document);
|
|
|
|
|
2011-12-28 17:04:16 +00:00
|
|
|
// FIXME: move HTML serialization to separate pipeline!
|
|
|
|
pthingy.document = treeBuilder.document;
|
|
|
|
|
2011-12-28 13:46:52 +00:00
|
|
|
// XXX: emit event with result
|
2011-12-28 01:37:06 +00:00
|
|
|
pthingy.getWikiDom = function() {
|
|
|
|
return JSON.stringify(
|
|
|
|
pthingy.DOMConverter.HTMLtoWiki( treeBuilder.document.body ),
|
|
|
|
null,
|
|
|
|
2
|
|
|
|
) + "\n";
|
|
|
|
};
|
|
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
// Add token transformations..
|
|
|
|
var qt = new QuoteTransformer();
|
|
|
|
qt.register(this.tokenDispatcher);
|
|
|
|
|
|
|
|
var citeExtension = new Cite();
|
|
|
|
citeExtension.register(this.tokenDispatcher);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ParseThingy.prototype = {
|
2011-12-28 13:46:52 +00:00
|
|
|
//XXX: This will be moved to the treeBuilder event listener callback,
|
|
|
|
//where it will process each received chunk.
|
2011-12-28 01:37:06 +00:00
|
|
|
buildTree: function ( tokens, treeBuilder ) {
|
|
|
|
// push a body element, just to be sure to have one
|
|
|
|
treeBuilder.processToken({type: 'TAG', name: 'body'});
|
|
|
|
// Process all tokens
|
|
|
|
for (var i = 0, length = tokens.length; i < length; i++) {
|
|
|
|
treeBuilder.processToken(tokens[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME HACK: For some reason the end token is not processed sometimes,
|
|
|
|
// which normally fixes the body reference up.
|
|
|
|
treeBuilder.document.body = treeBuilder.parser
|
|
|
|
.document.getElementsByTagName('body')[0];
|
|
|
|
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
if (typeof module == "object") {
|
|
|
|
module.exports.ParseThingy = ParseThingy;
|
|
|
|
}
|
|
|
|
|