2011-12-28 01:37:06 +00:00
|
|
|
/**
|
2012-03-07 18:42:26 +00:00
|
|
|
* This module assembles parser pipelines from parser stages with
|
|
|
|
* asynchronous communnication between stages based on events. Apart from the
|
|
|
|
* default pipeline which converts WikiText to HTML DOM, it also provides
|
|
|
|
* sub-pipelines for the processing of template transclusions.
|
2011-12-28 01:37:06 +00:00
|
|
|
*
|
2012-03-07 18:42:26 +00:00
|
|
|
* See http://www.mediawiki.org/wiki/Parsoid and
|
|
|
|
* http://www.mediawiki.org/wiki/Parsoid/Token_stream_transformations
|
|
|
|
* for illustrations of the pipeline architecture.
|
2011-12-28 01:37:06 +00:00
|
|
|
*
|
|
|
|
* @author Gabriel Wicke <gwicke@wikimedia.org>
|
|
|
|
* @author Neil Kandalgaonkar <neilk@wikimedia.org>
|
|
|
|
*/
|
|
|
|
|
2012-01-04 08:39:45 +00:00
|
|
|
// make this global for now
|
|
|
|
// XXX: figure out a way to get away without a global for PEG actions!
|
|
|
|
$ = require('jquery');
|
2012-01-11 00:05:51 +00:00
|
|
|
var events = require( 'events' );
|
2012-01-04 08:39:45 +00:00
|
|
|
|
2011-12-28 01:37:06 +00:00
|
|
|
var fs = require('fs'),
|
|
|
|
path = require('path'),
|
|
|
|
PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer,
|
2012-01-17 18:22:10 +00:00
|
|
|
TokenTransformManager = require('./mediawiki.TokenTransformManager.js'),
|
2012-04-25 14:35:59 +00:00
|
|
|
SyncTokenTransformManager = TokenTransformManager.SyncTokenTransformManager,
|
|
|
|
AsyncTokenTransformManager = TokenTransformManager.AsyncTokenTransformManager,
|
2012-01-20 01:46:16 +00:00
|
|
|
|
2012-01-20 22:02:23 +00:00
|
|
|
NoIncludeOnly = require('./ext.core.NoIncludeOnly.js'),
|
|
|
|
IncludeOnly = NoIncludeOnly.IncludeOnly,
|
|
|
|
NoInclude = NoIncludeOnly.NoInclude,
|
2012-02-28 13:24:35 +00:00
|
|
|
OnlyInclude = NoIncludeOnly.OnlyInclude,
|
2011-12-28 01:37:06 +00:00
|
|
|
QuoteTransformer = require('./ext.core.QuoteTransformer.js').QuoteTransformer,
|
2012-01-17 18:22:10 +00:00
|
|
|
PostExpandParagraphHandler = require('./ext.core.PostExpandParagraphHandler.js')
|
|
|
|
.PostExpandParagraphHandler,
|
2012-01-18 01:42:56 +00:00
|
|
|
Sanitizer = require('./ext.core.Sanitizer.js').Sanitizer,
|
2012-01-11 19:48:49 +00:00
|
|
|
TemplateHandler = require('./ext.core.TemplateHandler.js').TemplateHandler,
|
2012-02-09 13:44:20 +00:00
|
|
|
AttributeExpander = require('./ext.core.AttributeExpander.js').AttributeExpander,
|
2012-03-05 15:34:27 +00:00
|
|
|
LinkHandler = require('./ext.core.LinkHandler.js'),
|
|
|
|
WikiLinkHandler = LinkHandler.WikiLinkHandler,
|
|
|
|
ExternalLinkHandler = LinkHandler.ExternalLinkHandler,
|
2011-12-28 01:37:06 +00:00
|
|
|
Cite = require('./ext.Cite.js').Cite,
|
2012-04-01 02:08:54 +00:00
|
|
|
BehaviorSwitchHandler = require('./ext.core.BehaviorSwitchHandler.js').BehaviorSwitchHandler,
|
2012-04-25 14:35:59 +00:00
|
|
|
TreeBuilder = require('./mediawiki.HTML5TreeBuilder.node.js')
|
|
|
|
.FauxHTML5.TreeBuilder,
|
2012-01-03 18:44:31 +00:00
|
|
|
DOMPostProcessor = require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor,
|
2012-03-26 19:40:56 +00:00
|
|
|
DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter,
|
|
|
|
ConvertDOMToLM = require('./mediawiki.LinearModelConverter.js').ConvertDOMToLM;
|
2011-12-28 01:37:06 +00:00
|
|
|
|
2012-04-25 14:35:59 +00:00
|
|
|
|
|
|
|
function ParserPipelineFactory ( env ) {
|
|
|
|
this.pipelineCache = {};
|
|
|
|
this.env = env;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Recipe for parser pipelines and -subpipelines, depending on input types.
|
|
|
|
*
|
|
|
|
* Token stream transformations to register by type and per phase. The
|
|
|
|
* possible ranks for individual transformation registrations are [0,1)
|
|
|
|
* (excluding 1.0) for sync01, [1,2) for async12 and [2,3) for sync23.
|
|
|
|
*
|
|
|
|
* Should perhaps be moved to mediawiki.parser.environment.js, so that all
|
|
|
|
* configuration can be found in a single place.
|
|
|
|
*/
|
|
|
|
ParserPipelineFactory.prototype.recipes = {
|
|
|
|
// The full wikitext pipeline
|
|
|
|
'text/wiki/full': [
|
|
|
|
// Input pipeline including the tokenizer
|
|
|
|
'text/wiki',
|
|
|
|
// Final synchronous token transforms and DOM building / processing
|
|
|
|
'tokens/expanded'
|
|
|
|
],
|
|
|
|
|
|
|
|
// A pipeline from wikitext to expanded tokens. The input pipeline for
|
|
|
|
// wikitext.
|
|
|
|
'text/wiki': [
|
|
|
|
[ PegTokenizer, [] ],
|
|
|
|
'tokens/wiki'
|
|
|
|
],
|
|
|
|
|
|
|
|
// Synchronous per-input and async token stream transformations. Produces
|
|
|
|
// a fully expanded token stream ready for consumption by the
|
|
|
|
// tokens/expanded pipeline.
|
|
|
|
'tokens/wiki': [
|
|
|
|
// Synchronous in-order per input
|
|
|
|
[
|
|
|
|
SyncTokenTransformManager,
|
|
|
|
[ 1, 'tokens/wiki' ],
|
|
|
|
[
|
|
|
|
OnlyInclude,
|
|
|
|
IncludeOnly,
|
|
|
|
NoInclude,
|
|
|
|
BehaviorSwitchHandler
|
|
|
|
// Insert TokenCollectors for extensions here (don't expand
|
|
|
|
// templates in extension contents); wrap collected tokens in
|
|
|
|
// special extension token.
|
|
|
|
/* Extension1, */
|
|
|
|
/* Extension2, */
|
|
|
|
]
|
|
|
|
],
|
|
|
|
/*
|
|
|
|
* Asynchronous out-of-order per input. Each async transform can only
|
|
|
|
* operate on a single input token, but can emit multiple output
|
|
|
|
* tokens. If multiple tokens need to be collected per-input, then a
|
|
|
|
* separate collection transform in sync01 can be used to wrap the
|
|
|
|
* collected tokens into a single one later processed in an async12
|
|
|
|
* transform.
|
|
|
|
*/
|
|
|
|
[
|
|
|
|
AsyncTokenTransformManager,
|
|
|
|
[ 2, 'tokens/wiki' ],
|
|
|
|
[
|
|
|
|
TemplateHandler,
|
|
|
|
// Expand attributes after templates to avoid expanding unused branches
|
|
|
|
AttributeExpander,
|
|
|
|
WikiLinkHandler,
|
|
|
|
ExternalLinkHandler
|
|
|
|
/* ExtensionHandler1, */
|
|
|
|
/* ExtensionHandler2, */
|
|
|
|
]
|
|
|
|
]
|
|
|
|
],
|
|
|
|
|
|
|
|
// Final stages of main pipeline, operating on fully expanded tokens of
|
|
|
|
// potentially mixed origin.
|
|
|
|
'tokens/expanded': [
|
|
|
|
// Synchronous in-order on fully expanded token stream (including
|
|
|
|
// expanded templates etc). In order to support mixed input (from
|
|
|
|
// wikitext and plain HTML, say) all applicable transforms need to be
|
|
|
|
// included here. Input-specific token types avoid any runtime
|
|
|
|
// overhead for unused transforms.
|
|
|
|
[
|
|
|
|
SyncTokenTransformManager,
|
|
|
|
[ 3, 'tokens/expanded' ],
|
|
|
|
[
|
|
|
|
// text/wiki-specific tokens
|
|
|
|
QuoteTransformer,
|
|
|
|
PostExpandParagraphHandler,
|
|
|
|
/* Cite, */
|
|
|
|
/* ListHandler, */
|
|
|
|
Sanitizer
|
|
|
|
]
|
|
|
|
],
|
|
|
|
|
|
|
|
// Build a tree out of the fully processed token stream
|
|
|
|
[ TreeBuilder, [] ],
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Final processing on the HTML DOM.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Generic DOM transformer.
|
|
|
|
* This currently performs minor tree-dependent clean up like wrapping
|
|
|
|
* plain text in paragraphs. For HTML output, it would also be configured
|
|
|
|
* to perform more aggressive nesting cleanup.
|
|
|
|
*/
|
|
|
|
[ DOMPostProcessor, [] ]
|
|
|
|
]
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Generic pipeline creation from the above recipes
|
|
|
|
*/
|
|
|
|
ParserPipelineFactory.prototype.makePipeline = function( type, isInclude, cacheType ) {
|
|
|
|
var recipe = this.recipes[type];
|
|
|
|
if ( ! recipe ) {
|
|
|
|
console.trace();
|
|
|
|
throw( 'Error while trying to construct pipeline for ' + type );
|
|
|
|
}
|
|
|
|
var stages = [];
|
|
|
|
for ( var i = 0, l = recipe.length; i < l; i++ ) {
|
|
|
|
// create the stage
|
|
|
|
var stageData = recipe[i],
|
|
|
|
stage;
|
|
|
|
|
|
|
|
if ( stageData.constructor === String ) {
|
|
|
|
// Points to another subpipeline, get it recursively
|
|
|
|
stage = this.makePipeline( stageData, isInclude );
|
|
|
|
} else {
|
|
|
|
stage = Object.create( stageData[0].prototype );
|
|
|
|
// call the constructor
|
|
|
|
stageData[0].apply( stage, [ this.env, isInclude, this ].concat( stageData[1] ) );
|
|
|
|
if ( stageData.length >= 3 ) {
|
|
|
|
// Create (and implicitly register) transforms
|
|
|
|
var transforms = stageData[2];
|
|
|
|
for ( var t = 0; t < transforms.length; t++ ) {
|
|
|
|
new transforms[t](stage , isInclude);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// connect with previous stage
|
|
|
|
if ( i ) {
|
|
|
|
stage.addListenersOn( stages[i-1] );
|
|
|
|
}
|
|
|
|
stages.push( stage );
|
|
|
|
}
|
|
|
|
//console.warn( 'stages' + stages + JSON.stringify( stages ) );
|
|
|
|
return new ParserPipeline(
|
|
|
|
stages[0],
|
|
|
|
stages[stages.length - 1],
|
|
|
|
cacheType ? this.returnPipeline.bind( this, cacheType )
|
|
|
|
: null
|
|
|
|
);
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get a subpipeline (not the top-level one) of a given type.
|
|
|
|
*
|
|
|
|
* Subpipelines are cached as they are frequently created.
|
|
|
|
*/
|
|
|
|
ParserPipelineFactory.prototype.getPipeline = function ( type, isInclude ) {
|
|
|
|
// default to include
|
|
|
|
if ( isInclude === undefined ) {
|
|
|
|
isInclude = true;
|
|
|
|
}
|
|
|
|
var pipe,
|
|
|
|
cacheType = type;
|
|
|
|
if ( ! isInclude ) {
|
|
|
|
cacheType += '::noInclude';
|
|
|
|
}
|
|
|
|
if ( ! this.pipelineCache[cacheType] ) {
|
|
|
|
this.pipelineCache[cacheType] = [];
|
|
|
|
}
|
|
|
|
if ( this.pipelineCache[cacheType].length ) {
|
|
|
|
//console.warn( JSON.stringify( this.pipelineCache[cacheType] ));
|
|
|
|
return this.pipelineCache[cacheType].pop();
|
|
|
|
} else {
|
|
|
|
return this.makePipeline( type, isInclude, cacheType );
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Callback called by a pipeline at the end of its processing. Returns the
|
|
|
|
* pipeline to the cache.
|
|
|
|
*/
|
|
|
|
ParserPipelineFactory.prototype.returnPipeline = function ( type, pipe ) {
|
|
|
|
pipe.removeAllListeners( 'end' );
|
|
|
|
pipe.removeAllListeners( 'chunk' );
|
|
|
|
var cache = this.pipelineCache[type];
|
|
|
|
if ( cache.length < 5 ) {
|
|
|
|
cache.push( pipe );
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/******************** ParserPipeline ****************************/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Wrap some stages into a pipeline. The last member of the pipeline is
|
|
|
|
* supposed to emit events, while the first is supposed to support a process()
|
|
|
|
* method that sets the pipeline in motion.
|
|
|
|
*/
|
|
|
|
function ParserPipeline ( first, last, returnToCacheCB ) {
|
|
|
|
this.first = first;
|
|
|
|
this.last = last;
|
|
|
|
|
|
|
|
if ( returnToCacheCB ) {
|
|
|
|
var self = this;
|
|
|
|
this.returnToCacheCB = function () {
|
|
|
|
returnToCacheCB( self );
|
|
|
|
};
|
|
|
|
|
|
|
|
// add a callback to return the pipeline back to the cache
|
|
|
|
this.last.addListener( 'end', this.returnToCacheCB );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Feed input tokens to the first pipeline stage
|
|
|
|
*/
|
|
|
|
ParserPipeline.prototype.process = function(input, key) {
|
|
|
|
return this.first.process(input, key);
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Set the frame on the last pipeline stage (normally the
|
|
|
|
* AsyncTokenTransformManager).
|
|
|
|
*/
|
|
|
|
ParserPipeline.prototype.setFrame = function(frame, title, args) {
|
|
|
|
return this.last.setFrame(frame, title, args);
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Register the first pipeline stage with the last stage from a separate pipeline
|
|
|
|
*/
|
|
|
|
ParserPipeline.prototype.addListenersOn = function(stage) {
|
|
|
|
return this.first.addListenersOn(stage);
|
|
|
|
};
|
|
|
|
|
|
|
|
// Forward the EventEmitter API to this.last
|
|
|
|
ParserPipeline.prototype.on = function (ev, cb) {
|
|
|
|
return this.last.on(ev, cb);
|
|
|
|
};
|
|
|
|
ParserPipeline.prototype.once = function (ev, cb) {
|
|
|
|
return this.last.once(ev, cb);
|
|
|
|
};
|
|
|
|
ParserPipeline.prototype.addListener = function(ev, cb) {
|
|
|
|
return this.last.addListener(ev, cb);
|
|
|
|
};
|
|
|
|
ParserPipeline.prototype.removeListener = function(ev, cb) {
|
|
|
|
return this.last.removeListener(ev, cb);
|
|
|
|
};
|
|
|
|
ParserPipeline.prototype.setMaxListeners = function(n) {
|
|
|
|
return this.last.setMaxListeners(n);
|
|
|
|
};
|
|
|
|
ParserPipeline.prototype.listeners = function(ev) {
|
|
|
|
return this.last.listeners(ev);
|
|
|
|
};
|
|
|
|
ParserPipeline.prototype.removeAllListeners = function ( event ) {
|
|
|
|
if ( event === 'end' ) {
|
|
|
|
this.last.removeAllListeners('end');
|
|
|
|
// now re-add the cache callback
|
|
|
|
if ( this.returnToCacheCB ) {
|
|
|
|
this.last.addListener( 'end', this.returnToCacheCB );
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return this.last.removeAllListeners( event );
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/********************* Old stuff ****************************/
|
|
|
|
|
2012-01-09 19:33:49 +00:00
|
|
|
/**
|
|
|
|
* Set up a simple parser pipeline. There will be a single pipeline overall,
|
|
|
|
* but there can be multiple sub-pipelines for template expansions etc, which
|
|
|
|
* in turn differ by input type. The main input type will be fixed at
|
|
|
|
* construction time though.
|
|
|
|
*
|
|
|
|
* @class
|
|
|
|
* @constructor
|
|
|
|
* @param {Object} Environment.
|
|
|
|
*/
|
2012-04-25 14:35:59 +00:00
|
|
|
function OldParserPipeline( first, last, returnToCacheCB ) {
|
2012-01-09 19:33:49 +00:00
|
|
|
|
|
|
|
if ( ! inputType ) {
|
|
|
|
// Actually the only one supported for now, but could also create
|
|
|
|
// others for serialized tokens etc
|
|
|
|
inputType = 'text/wiki';
|
|
|
|
}
|
2012-02-20 11:07:21 +00:00
|
|
|
this.inputType = inputType;
|
2012-01-09 19:33:49 +00:00
|
|
|
|
2011-12-28 01:37:06 +00:00
|
|
|
|
2012-04-12 13:42:09 +00:00
|
|
|
// Pass in a full-fledged environment based on
|
2012-01-09 19:33:49 +00:00
|
|
|
// mediawiki.parser.environment.js.
|
2012-01-09 17:49:16 +00:00
|
|
|
if ( !env ) {
|
|
|
|
this.env = {};
|
|
|
|
} else {
|
|
|
|
this.env = env;
|
2011-12-28 01:37:06 +00:00
|
|
|
}
|
|
|
|
|
2012-01-11 00:05:51 +00:00
|
|
|
// set up a sub-pipeline cache
|
2012-02-20 11:07:21 +00:00
|
|
|
this.pipelineCache = {};
|
|
|
|
this.pipelineCache[this.inputType] = {
|
2012-04-12 13:42:09 +00:00
|
|
|
'input-toplevel': [],
|
|
|
|
'input-include': [],
|
|
|
|
'attribute-include': [],
|
|
|
|
'attribute-toplevel': []
|
2012-01-11 00:05:51 +00:00
|
|
|
};
|
|
|
|
|
2012-01-09 19:33:49 +00:00
|
|
|
// Create an input pipeline for the given input type.
|
2012-04-11 14:34:27 +00:00
|
|
|
this.inputPipeline = this.makeInputPipeline ( inputType, {}, false );
|
2012-04-12 13:42:09 +00:00
|
|
|
|
|
|
|
// Mark this pipeline as the top-level input pipeline, so that it is not
|
|
|
|
// cached and its listeners removed
|
2012-01-11 19:48:49 +00:00
|
|
|
this.inputPipeline.atTopLevel = true;
|
2012-01-21 03:03:33 +00:00
|
|
|
this.inputPipeline.last.atTopLevel = true;
|
2012-01-09 17:49:16 +00:00
|
|
|
|
|
|
|
|
2012-02-20 12:56:00 +00:00
|
|
|
this.tokenPostProcessor = new TokenTransformManager
|
2012-04-11 14:34:27 +00:00
|
|
|
.SyncTokenTransformManager ( env, inputType, 3.0, false );
|
2012-01-11 19:48:49 +00:00
|
|
|
this.tokenPostProcessor.listenForTokensFrom ( this.inputPipeline );
|
2011-12-28 01:37:06 +00:00
|
|
|
|
|
|
|
|
|
|
|
// Add token transformations..
|
2012-04-25 14:35:59 +00:00
|
|
|
this._addTransformers( 'tokens/expanded', 'sync23', this.tokenPostProcessor, false );
|
2012-01-03 18:44:31 +00:00
|
|
|
|
2012-01-04 11:00:54 +00:00
|
|
|
/**
|
2012-01-04 11:06:24 +00:00
|
|
|
* The tree builder creates a DOM tree from the token soup emitted from
|
|
|
|
* the TokenTransformDispatcher.
|
|
|
|
*/
|
2012-04-16 13:47:03 +00:00
|
|
|
this.treeBuilder = new FauxHTML5.TreeBuilder( this.env );
|
2012-01-09 17:49:16 +00:00
|
|
|
this.treeBuilder.listenForTokensFrom( this.tokenPostProcessor );
|
2012-01-31 16:50:16 +00:00
|
|
|
//this.tokenPostProcessor.on('chunk', function( c ) {
|
2012-02-14 20:56:14 +00:00
|
|
|
// console.warn( JSON.stringify( c, null, 2 ));
|
2012-01-31 16:50:16 +00:00
|
|
|
//} );
|
2011-12-28 01:37:06 +00:00
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
this.postProcessor = new DOMPostProcessor();
|
2012-01-04 11:00:54 +00:00
|
|
|
this.postProcessor.listenForDocumentFrom( this.treeBuilder );
|
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
|
2012-01-04 11:00:54 +00:00
|
|
|
/**
|
2012-01-04 11:06:24 +00:00
|
|
|
* Conversion from HTML DOM to WikiDOM. This is not needed if plain HTML
|
2012-01-04 12:28:41 +00:00
|
|
|
* DOM output is desired, so it should only be registered to the
|
2012-01-04 11:06:24 +00:00
|
|
|
* DOMPostProcessor 'document' event if WikiDom output is requested. We
|
|
|
|
* could emit events for 'dom', 'wikidom', 'html' and so on, but only
|
|
|
|
* actually set up the needed pipeline stages if a listener is registered.
|
|
|
|
* Overriding the addListener method should make this possible.
|
|
|
|
*/
|
2012-01-03 18:44:31 +00:00
|
|
|
this.DOMConverter = new DOMConverter();
|
2012-01-04 11:00:54 +00:00
|
|
|
|
|
|
|
|
2012-01-18 23:46:01 +00:00
|
|
|
// Lame version for now, see above for an idea for the external async
|
2012-01-04 11:00:54 +00:00
|
|
|
// interface and pipeline setup
|
2012-01-18 23:46:01 +00:00
|
|
|
this.postProcessor.addListener( 'document', this.forwardDocument.bind( this ) );
|
2012-01-11 00:05:51 +00:00
|
|
|
|
|
|
|
|
2011-12-28 01:37:06 +00:00
|
|
|
}
|
|
|
|
|
2012-01-18 23:46:01 +00:00
|
|
|
// Inherit from EventEmitter
|
2012-04-25 14:35:59 +00:00
|
|
|
OldParserPipeline.prototype = new events.EventEmitter();
|
|
|
|
OldParserPipeline.prototype.constructor = OldParserPipeline;
|
2012-02-20 11:07:21 +00:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Add all transformers to a token transform manager for a given input type
|
|
|
|
* and phase.
|
|
|
|
*/
|
2012-04-25 14:35:59 +00:00
|
|
|
OldParserPipeline.prototype._addTransformers = function ( type, phase, manager, isInclude )
|
2012-02-20 11:07:21 +00:00
|
|
|
{
|
|
|
|
var transformers;
|
|
|
|
try {
|
|
|
|
transformers = this._transformers[type][phase];
|
|
|
|
} catch ( e ) {
|
|
|
|
console.warn( 'Error while looking for token transformers for ' +
|
|
|
|
type + ' and phase ' + phase );
|
|
|
|
transformers = [];
|
|
|
|
}
|
|
|
|
for ( var i = 0, l = transformers.length; i < l; i++ ) {
|
|
|
|
new transformers[i]( manager, isInclude );
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2012-01-09 19:33:49 +00:00
|
|
|
/**
|
|
|
|
* Factory method for the input (up to async token transforms / phase two)
|
|
|
|
* parts of the parser pipeline.
|
|
|
|
*
|
|
|
|
* @method
|
|
|
|
* @param {String} Input type. Try 'text/wiki'.
|
|
|
|
* @param {Object} Expanded template arguments to pass to the
|
|
|
|
* AsyncTokenTransformManager.
|
|
|
|
* @returns {Object} { first: <first stage>, last: AsyncTokenTransformManager }
|
|
|
|
* First stage is supposed to implement a process() function
|
|
|
|
* that can accept all input at once. The wikitext tokenizer for example
|
|
|
|
* accepts the wiki text this way. The last stage of the input pipeline is
|
|
|
|
* always an AsyncTokenTransformManager, which emits its output in events.
|
|
|
|
*/
|
2012-04-25 14:35:59 +00:00
|
|
|
OldParserPipeline.prototype.makeInputPipeline = function ( inputType, args, isInclude ) {
|
2012-04-12 13:42:09 +00:00
|
|
|
var pipelinePart = isInclude ? 'input-include' : 'input-toplevel';
|
2012-01-10 01:09:50 +00:00
|
|
|
switch ( inputType ) {
|
|
|
|
case 'text/wiki':
|
2012-02-14 20:56:14 +00:00
|
|
|
//console.warn( 'makeInputPipeline ' + JSON.stringify( args ) );
|
2012-04-12 13:42:09 +00:00
|
|
|
if ( this.pipelineCache['text/wiki'][pipelinePart].length ) {
|
|
|
|
var pipe = this.pipelineCache['text/wiki'][pipelinePart].pop();
|
2012-01-17 22:29:26 +00:00
|
|
|
pipe.last.args = args;
|
|
|
|
return pipe;
|
2012-01-11 00:05:51 +00:00
|
|
|
} else {
|
2012-04-18 12:30:59 +00:00
|
|
|
var wikiTokenizer = new PegTokenizer( this.env, isInclude );
|
2012-01-11 00:05:51 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Token stream transformations.
|
|
|
|
* This is where all the wiki-specific functionality is implemented.
|
2012-02-20 12:56:00 +00:00
|
|
|
* See
|
|
|
|
* https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations
|
2012-01-11 00:05:51 +00:00
|
|
|
*/
|
2012-02-20 12:56:00 +00:00
|
|
|
var tokenPreProcessor = new TokenTransformManager
|
2012-04-11 14:34:27 +00:00
|
|
|
.SyncTokenTransformManager ( this.env, 'text/wiki', 1, isInclude );
|
2012-01-11 00:05:51 +00:00
|
|
|
tokenPreProcessor.listenForTokensFrom ( wikiTokenizer );
|
|
|
|
|
2012-02-20 11:07:21 +00:00
|
|
|
this._addTransformers( 'text/wiki', 'sync01',
|
2012-04-11 14:34:27 +00:00
|
|
|
tokenPreProcessor, isInclude );
|
2012-02-20 11:07:21 +00:00
|
|
|
|
2012-01-20 01:46:16 +00:00
|
|
|
|
2012-01-11 00:05:51 +00:00
|
|
|
var tokenExpander = new TokenTransformManager.AsyncTokenTransformManager (
|
|
|
|
{
|
|
|
|
'input': this.makeInputPipeline.bind( this ),
|
|
|
|
'attributes': this.makeAttributePipeline.bind( this )
|
|
|
|
},
|
2012-04-11 14:34:27 +00:00
|
|
|
args, this.env, inputType, 2.0, isInclude
|
2012-01-11 00:05:51 +00:00
|
|
|
);
|
2012-01-11 19:48:49 +00:00
|
|
|
|
|
|
|
// Register template expansion extension
|
2012-02-20 11:07:21 +00:00
|
|
|
this._addTransformers( 'text/wiki', 'async12',
|
2012-04-11 14:34:27 +00:00
|
|
|
tokenExpander, isInclude );
|
2012-01-11 19:48:49 +00:00
|
|
|
|
2012-01-11 00:05:51 +00:00
|
|
|
tokenExpander.listenForTokensFrom ( tokenPreProcessor );
|
2012-01-11 19:48:49 +00:00
|
|
|
// XXX: hack.
|
|
|
|
tokenExpander.inputType = inputType;
|
2012-01-14 00:58:20 +00:00
|
|
|
tokenPreProcessor.inputType = inputType;
|
2012-01-10 01:09:50 +00:00
|
|
|
|
2012-01-11 00:05:51 +00:00
|
|
|
return new CachedTokenPipeline(
|
2012-04-12 13:42:09 +00:00
|
|
|
this.cachePipeline.bind( this, 'text/wiki', pipelinePart ),
|
2012-01-11 00:05:51 +00:00
|
|
|
wikiTokenizer,
|
2012-04-12 08:18:42 +00:00
|
|
|
tokenExpander,
|
|
|
|
isInclude
|
2012-01-11 00:05:51 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
break;
|
2012-01-10 01:09:50 +00:00
|
|
|
|
|
|
|
default:
|
2012-01-11 19:48:49 +00:00
|
|
|
console.trace();
|
2012-04-25 14:35:59 +00:00
|
|
|
throw "OldParserPipeline.makeInputPipeline: Unsupported input type " + inputType;
|
2012-01-09 17:49:16 +00:00
|
|
|
}
|
2012-01-09 19:33:49 +00:00
|
|
|
};
|
2012-01-09 17:49:16 +00:00
|
|
|
|
2012-01-09 19:33:49 +00:00
|
|
|
|
2012-01-11 00:05:51 +00:00
|
|
|
|
2012-01-09 19:33:49 +00:00
|
|
|
/**
|
2012-01-10 01:09:50 +00:00
|
|
|
* Factory for attribute transformations, with input type implicit in the
|
|
|
|
* environment.
|
|
|
|
*/
|
2012-04-25 14:35:59 +00:00
|
|
|
OldParserPipeline.prototype.makeAttributePipeline = function ( inputType, args, isInclude ) {
|
2012-04-12 13:42:09 +00:00
|
|
|
var pipelinePart = isInclude ? 'attribute-include' : 'attribute-toplevel';
|
|
|
|
//console.warn( 'makeAttributePipeline: ' + pipelinePart);
|
|
|
|
if ( this.pipelineCache[inputType][pipelinePart].length ) {
|
|
|
|
var pipe = this.pipelineCache[inputType][pipelinePart].pop();
|
2012-01-17 23:18:33 +00:00
|
|
|
pipe.last.args = args;
|
2012-04-12 13:42:09 +00:00
|
|
|
//console.warn( 'from cache' + JSON.stringify( pipe.last.transformers, null, 2 ) );
|
2012-01-17 23:18:33 +00:00
|
|
|
return pipe;
|
2012-01-11 00:05:51 +00:00
|
|
|
} else {
|
|
|
|
/**
|
|
|
|
* Token stream transformations.
|
|
|
|
* This is where all the wiki-specific functionality is implemented.
|
|
|
|
* See https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations
|
|
|
|
*/
|
2012-02-20 12:56:00 +00:00
|
|
|
var tokenPreProcessor = new TokenTransformManager
|
2012-04-11 19:55:52 +00:00
|
|
|
.SyncTokenTransformManager ( this.env, inputType, 1, isInclude );
|
2012-02-20 12:56:00 +00:00
|
|
|
|
2012-04-11 14:34:27 +00:00
|
|
|
this._addTransformers( inputType, 'sync01', tokenPreProcessor, isInclude );
|
2012-02-20 12:56:00 +00:00
|
|
|
|
2012-01-20 01:46:16 +00:00
|
|
|
new NoInclude( tokenPreProcessor );
|
|
|
|
|
2012-01-11 00:05:51 +00:00
|
|
|
var tokenExpander = new TokenTransformManager.AsyncTokenTransformManager (
|
2012-01-11 19:48:49 +00:00
|
|
|
{
|
|
|
|
'input': this.makeInputPipeline.bind( this ),
|
|
|
|
'attributes': this.makeAttributePipeline.bind( this )
|
|
|
|
},
|
2012-04-11 19:55:52 +00:00
|
|
|
args, this.env, inputType, 2, isInclude
|
2012-01-11 19:48:49 +00:00
|
|
|
);
|
2012-04-12 13:42:09 +00:00
|
|
|
// Add token transformers
|
2012-02-20 12:56:00 +00:00
|
|
|
this._addTransformers( 'text/wiki', 'async12',
|
2012-04-11 14:34:27 +00:00
|
|
|
tokenExpander, isInclude );
|
2012-04-12 13:42:09 +00:00
|
|
|
|
2012-01-11 00:05:51 +00:00
|
|
|
tokenExpander.listenForTokensFrom ( tokenPreProcessor );
|
|
|
|
|
2012-04-12 13:42:09 +00:00
|
|
|
//console.warn( 'new pipe' + JSON.stringify( tokenExpander.transformers, null, 2 ) );
|
2012-01-11 00:05:51 +00:00
|
|
|
return new CachedTokenPipeline(
|
2012-04-12 13:42:09 +00:00
|
|
|
this.cachePipeline.bind( this, inputType, pipelinePart ),
|
2012-01-11 00:05:51 +00:00
|
|
|
tokenPreProcessor,
|
2012-04-12 08:18:42 +00:00
|
|
|
tokenExpander,
|
|
|
|
isInclude
|
2012-01-11 00:05:51 +00:00
|
|
|
);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2012-04-25 14:35:59 +00:00
|
|
|
OldParserPipeline.prototype.cachePipeline = function ( inputType, pipelinePart, pipe ) {
|
2012-01-11 00:05:51 +00:00
|
|
|
var cache = this.pipelineCache[inputType][pipelinePart];
|
2012-04-12 13:42:09 +00:00
|
|
|
if ( cache && cache.length < 5 ) {
|
2012-01-11 00:05:51 +00:00
|
|
|
cache.push( pipe );
|
|
|
|
}
|
2012-01-10 01:09:50 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2012-01-11 00:05:51 +00:00
|
|
|
|
2012-01-10 01:09:50 +00:00
|
|
|
/**
|
|
|
|
* Feed the parser pipeline with some input, the output is emitted in events.
|
2012-01-09 19:33:49 +00:00
|
|
|
*
|
|
|
|
* @method
|
|
|
|
* @param {Mixed} All arguments are passed through to the underlying input
|
|
|
|
* pipeline's first element's process() method. For a wikitext pipeline (the
|
2012-01-10 01:09:50 +00:00
|
|
|
* default), this would be the wikitext to tokenize:
|
|
|
|
* pipeline.parse ( wikiText );
|
2012-01-09 19:33:49 +00:00
|
|
|
*/
|
2012-04-25 14:35:59 +00:00
|
|
|
OldParserPipeline.prototype.parse = function ( ) {
|
2012-01-09 19:33:49 +00:00
|
|
|
// Set the pipeline in motion by feeding the first element with the given
|
|
|
|
// arguments.
|
2012-01-11 19:48:49 +00:00
|
|
|
this.inputPipeline.process.apply( this.inputPipeline , arguments );
|
2012-01-04 11:00:54 +00:00
|
|
|
};
|
2011-12-28 01:37:06 +00:00
|
|
|
|
2012-01-18 23:46:01 +00:00
|
|
|
// Just bubble up the document event from the pipeline
|
2012-04-25 14:35:59 +00:00
|
|
|
OldParserPipeline.prototype.forwardDocument = function ( document ) {
|
2012-01-18 23:46:01 +00:00
|
|
|
this.emit( 'document', document );
|
2012-01-04 11:06:24 +00:00
|
|
|
};
|
2012-01-03 18:44:31 +00:00
|
|
|
|
2011-12-28 01:37:06 +00:00
|
|
|
|
2012-01-04 11:00:54 +00:00
|
|
|
// XXX: remove JSON serialization here, that should only be performed when
|
2012-01-09 19:33:49 +00:00
|
|
|
// needed (and normally without pretty-printing).
|
2012-04-25 14:35:59 +00:00
|
|
|
OldParserPipeline.prototype.getWikiDom = function ( document ) {
|
2012-01-03 18:44:31 +00:00
|
|
|
return JSON.stringify(
|
2012-03-26 19:40:56 +00:00
|
|
|
this.DOMConverter.HTMLtoWiki( document.body ),
|
2012-01-03 18:44:31 +00:00
|
|
|
null,
|
|
|
|
2
|
|
|
|
);
|
|
|
|
};
|
|
|
|
|
2012-04-25 14:35:59 +00:00
|
|
|
OldParserPipeline.prototype.getLinearModel = function( document ) {
|
2012-03-26 19:40:56 +00:00
|
|
|
return JSON.stringify( ConvertDOMToLM( document.body ), null, 2 );
|
|
|
|
};
|
|
|
|
|
2012-01-03 18:44:31 +00:00
|
|
|
|
2012-01-11 00:05:51 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
2011-12-28 01:37:06 +00:00
|
|
|
if (typeof module == "object") {
|
2012-01-04 08:39:45 +00:00
|
|
|
module.exports.ParserPipeline = ParserPipeline;
|
2012-04-25 14:35:59 +00:00
|
|
|
module.exports.ParserPipelineFactory = ParserPipelineFactory;
|
2011-12-28 01:37:06 +00:00
|
|
|
}
|