2011-12-28 01:37:06 +00:00
|
|
|
/**
|
|
|
|
*
|
|
|
|
* Simple parser class. Should have lots of options for observing parse stages (or, use events).
|
|
|
|
*
|
|
|
|
* @author Gabriel Wicke <gwicke@wikimedia.org>
|
|
|
|
* @author Neil Kandalgaonkar <neilk@wikimedia.org>
|
|
|
|
*/
|
|
|
|
|
2012-01-04 08:39:45 +00:00
|
|
|
// make this global for now
|
|
|
|
// XXX: figure out a way to get away without a global for PEG actions!
|
|
|
|
$ = require('jquery');
|
|
|
|
|
2011-12-28 01:37:06 +00:00
|
|
|
var fs = require('fs'),
|
|
|
|
path = require('path'),
|
|
|
|
PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer,
|
|
|
|
TokenTransformDispatcher = require('./mediawiki.TokenTransformDispatcher.js').TokenTransformDispatcher,
|
|
|
|
QuoteTransformer = require('./ext.core.QuoteTransformer.js').QuoteTransformer,
|
|
|
|
Cite = require('./ext.Cite.js').Cite,
|
2012-01-03 18:44:31 +00:00
|
|
|
FauxHTML5 = require('./mediawiki.HTML5TreeBuilder.node.js').FauxHTML5,
|
|
|
|
DOMPostProcessor = require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor,
|
|
|
|
DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter;
|
2011-12-28 01:37:06 +00:00
|
|
|
|
2012-01-04 08:39:45 +00:00
|
|
|
function ParserPipeline( config ) {
|
2012-01-03 18:44:31 +00:00
|
|
|
// Set up a simple parser pipeline.
|
2011-12-28 01:37:06 +00:00
|
|
|
|
|
|
|
if ( !config ) {
|
|
|
|
config = {};
|
|
|
|
}
|
|
|
|
|
2011-12-28 17:04:16 +00:00
|
|
|
this.wikiTokenizer = new PegTokenizer();
|
2011-12-28 01:37:06 +00:00
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
this.tokenDispatcher = new TokenTransformDispatcher ();
|
2011-12-28 01:37:06 +00:00
|
|
|
|
|
|
|
// Add token transformations..
|
|
|
|
var qt = new QuoteTransformer();
|
|
|
|
qt.register(this.tokenDispatcher);
|
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
//var citeExtension = new Cite();
|
|
|
|
//citeExtension.register(this.tokenDispatcher);
|
|
|
|
|
|
|
|
this.tokenDispatcher.subscribeToTokenEmitter( this.wikiTokenizer );
|
|
|
|
|
|
|
|
// Create a new tree builder, which also creates a new document.
|
|
|
|
// XXX: implicitly clean up old state after processing end token, so
|
|
|
|
// that we can reuse the tree builder.
|
|
|
|
// XXX: convert to event listener listening for token chunks from the
|
|
|
|
// token transformer and and emitting an additional 'done' event after
|
|
|
|
// processing the 'end' token.
|
|
|
|
this.treeBuilder = new FauxHTML5.TreeBuilder();
|
|
|
|
this.treeBuilder.subscribeToTokenEmitter( this.tokenDispatcher );
|
2011-12-28 01:37:06 +00:00
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
// Prepare these two, but only call them from parse and getWikiDom for
|
|
|
|
// now. These will be called in a callback later, when the full pipeline
|
|
|
|
// is used asynchronously.
|
|
|
|
this.postProcessor = new DOMPostProcessor();
|
|
|
|
|
|
|
|
this.DOMConverter = new DOMConverter();
|
2011-12-28 01:37:06 +00:00
|
|
|
}
|
|
|
|
|
2012-01-04 08:39:45 +00:00
|
|
|
ParserPipeline.prototype.parse = function ( text ) {
|
2012-01-03 18:44:31 +00:00
|
|
|
// Set the pipeline in motion by feeding the tokenizer
|
|
|
|
this.wikiTokenizer.tokenize( text );
|
2011-12-28 01:37:06 +00:00
|
|
|
|
2012-01-04 08:39:45 +00:00
|
|
|
// XXX: Convert parse to an async pipeline as well!
|
|
|
|
// The remaining processing below will have to happen in a callback,
|
|
|
|
// triggered on the treeBuilder 'end' event, followed by an event emission
|
|
|
|
// or callback calling instead of returning.
|
2012-01-03 18:44:31 +00:00
|
|
|
this.document = this.treeBuilder.document;
|
2011-12-28 01:37:06 +00:00
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
//console.log(this.document.body.innerHTML);
|
|
|
|
|
|
|
|
// Perform synchronous post-processing on DOM.
|
2012-01-04 08:39:45 +00:00
|
|
|
// XXX: convert to event listener (listening on treeBuilder 'end'
|
2012-01-03 18:44:31 +00:00
|
|
|
// event)
|
|
|
|
this.postProcessor.doPostProcess( this.document );
|
2011-12-28 01:37:06 +00:00
|
|
|
};
|
|
|
|
|
2012-01-04 08:39:45 +00:00
|
|
|
ParserPipeline.prototype.getWikiDom = function () {
|
2012-01-03 18:44:31 +00:00
|
|
|
return JSON.stringify(
|
2012-01-04 08:39:45 +00:00
|
|
|
this.DOMConverter.HTMLtoWiki( this.document.body ),
|
2012-01-03 18:44:31 +00:00
|
|
|
null,
|
|
|
|
2
|
|
|
|
);
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2011-12-28 01:37:06 +00:00
|
|
|
if (typeof module == "object") {
|
2012-01-04 08:39:45 +00:00
|
|
|
module.exports.ParserPipeline = ParserPipeline;
|
2011-12-28 01:37:06 +00:00
|
|
|
}
|