/** * This module assembles parser pipelines from parser stages with * asynchronous communnication between stages based on events. Apart from the * default pipeline which converts WikiText to HTML DOM, it also provides * sub-pipelines for the processing of template transclusions. * * See http://www.mediawiki.org/wiki/Parsoid and * http://www.mediawiki.org/wiki/Parsoid/Token_stream_transformations * for illustrations of the pipeline architecture. * * @author Gabriel Wicke * @author Neil Kandalgaonkar */ // make this global for now // XXX: figure out a way to get away without a global for PEG actions! $ = require('jquery'); var events = require( 'events' ); var fs = require('fs'), path = require('path'), PegTokenizer = require('./mediawiki.tokenizer.peg.js').PegTokenizer, TokenTransformManager = require('./mediawiki.TokenTransformManager.js'), SyncTokenTransformManager = TokenTransformManager.SyncTokenTransformManager, AsyncTokenTransformManager = TokenTransformManager.AsyncTokenTransformManager, NoIncludeOnly = require('./ext.core.NoIncludeOnly.js'), IncludeOnly = NoIncludeOnly.IncludeOnly, NoInclude = NoIncludeOnly.NoInclude, OnlyInclude = NoIncludeOnly.OnlyInclude, QuoteTransformer = require('./ext.core.QuoteTransformer.js').QuoteTransformer, PostExpandParagraphHandler = require('./ext.core.PostExpandParagraphHandler.js') .PostExpandParagraphHandler, Sanitizer = require('./ext.core.Sanitizer.js').Sanitizer, TemplateHandler = require('./ext.core.TemplateHandler.js').TemplateHandler, AttributeExpander = require('./ext.core.AttributeExpander.js').AttributeExpander, LinkHandler = require('./ext.core.LinkHandler.js'), WikiLinkHandler = LinkHandler.WikiLinkHandler, ExternalLinkHandler = LinkHandler.ExternalLinkHandler, Cite = require('./ext.Cite.js').Cite, BehaviorSwitchHandler = require('./ext.core.BehaviorSwitchHandler.js').BehaviorSwitchHandler, TreeBuilder = require('./mediawiki.HTML5TreeBuilder.node.js') .FauxHTML5.TreeBuilder, DOMPostProcessor = require('./mediawiki.DOMPostProcessor.js').DOMPostProcessor; function ParserPipelineFactory ( env ) { this.pipelineCache = {}; this.env = env; } /** * Recipe for parser pipelines and -subpipelines, depending on input types. * * Token stream transformations to register by type and per phase. The * possible ranks for individual transformation registrations are [0,1) * (excluding 1.0) for sync01, [1,2) for async12 and [2,3) for sync23. * * Should perhaps be moved to mediawiki.parser.environment.js, so that all * configuration can be found in a single place. */ ParserPipelineFactory.prototype.recipes = { // The full wikitext pipeline 'text/x-mediawiki/full': [ // Input pipeline including the tokenizer 'text/x-mediawiki', // Final synchronous token transforms and DOM building / processing 'tokens/x-mediawiki/expanded' ], // A pipeline from wikitext to expanded tokens. The input pipeline for // wikitext. 'text/x-mediawiki': [ [ PegTokenizer, [] ], 'tokens/x-mediawiki' ], // Synchronous per-input and async token stream transformations. Produces // a fully expanded token stream ready for consumption by the // tokens/expanded pipeline. 'tokens/x-mediawiki': [ // Synchronous in-order per input [ SyncTokenTransformManager, [ 1, 'tokens/x-mediawiki' ], [ OnlyInclude, IncludeOnly, NoInclude, BehaviorSwitchHandler // Insert TokenCollectors for extensions here (don't expand // templates in extension contents); wrap collected tokens in // special extension token. /* Extension1, */ /* Extension2, */ ] ], /* * Asynchronous out-of-order per input. Each async transform can only * operate on a single input token, but can emit multiple output * tokens. If multiple tokens need to be collected per-input, then a * separate collection transform in sync01 can be used to wrap the * collected tokens into a single one later processed in an async12 * transform. */ [ AsyncTokenTransformManager, [ 2, 'tokens/x-mediawiki' ], [ TemplateHandler, // Expand attributes after templates to avoid expanding unused branches AttributeExpander, WikiLinkHandler, ExternalLinkHandler /* ExtensionHandler1, */ /* ExtensionHandler2, */ ] ] ], // Final stages of main pipeline, operating on fully expanded tokens of // potentially mixed origin. 'tokens/x-mediawiki/expanded': [ // Synchronous in-order on fully expanded token stream (including // expanded templates etc). In order to support mixed input (from // wikitext and plain HTML, say) all applicable transforms need to be // included here. Input-specific token types avoid any runtime // overhead for unused transforms. [ SyncTokenTransformManager, [ 3, 'tokens/x-mediawiki/expanded' ], [ // text/wiki-specific tokens QuoteTransformer, /* ListHandler, */ Cite, PostExpandParagraphHandler, Sanitizer ] ], // Build a tree out of the fully processed token stream [ TreeBuilder, [] ], /** * Final processing on the HTML DOM. */ /* Generic DOM transformer. * This currently performs minor tree-dependent clean up like wrapping * plain text in paragraphs. For HTML output, it would also be configured * to perform more aggressive nesting cleanup. */ [ DOMPostProcessor, [] ] ] }; /** * Generic pipeline creation from the above recipes */ ParserPipelineFactory.prototype.makePipeline = function( type, isInclude, cacheType ) { var recipe = this.recipes[type]; if ( ! recipe ) { console.trace(); throw( 'Error while trying to construct pipeline for ' + type ); } var stages = []; for ( var i = 0, l = recipe.length; i < l; i++ ) { // create the stage var stageData = recipe[i], stage; if ( stageData.constructor === String ) { // Points to another subpipeline, get it recursively stage = this.makePipeline( stageData, isInclude ); } else { stage = Object.create( stageData[0].prototype ); // call the constructor stageData[0].apply( stage, [ this.env, isInclude, this ].concat( stageData[1] ) ); if ( stageData.length >= 3 ) { // Create (and implicitly register) transforms var transforms = stageData[2]; for ( var t = 0; t < transforms.length; t++ ) { new transforms[t](stage , isInclude); } } } // connect with previous stage if ( i ) { stage.addListenersOn( stages[i-1] ); } stages.push( stage ); } //console.warn( 'stages' + stages + JSON.stringify( stages ) ); return new ParserPipeline( stages[0], stages[stages.length - 1], cacheType ? this.returnPipeline.bind( this, cacheType ) : null ); }; /** * Get a subpipeline (not the top-level one) of a given type. * * Subpipelines are cached as they are frequently created. */ ParserPipelineFactory.prototype.getPipeline = function ( type, isInclude ) { // default to include if ( isInclude === undefined ) { isInclude = true; } var pipe, cacheType = type; if ( ! isInclude ) { cacheType += '::noInclude'; } if ( ! this.pipelineCache[cacheType] ) { this.pipelineCache[cacheType] = []; } if ( this.pipelineCache[cacheType].length ) { //console.warn( JSON.stringify( this.pipelineCache[cacheType] )); return this.pipelineCache[cacheType].pop(); } else { return this.makePipeline( type, isInclude, cacheType ); } }; /** * Callback called by a pipeline at the end of its processing. Returns the * pipeline to the cache. */ ParserPipelineFactory.prototype.returnPipeline = function ( type, pipe ) { pipe.removeAllListeners( 'end' ); pipe.removeAllListeners( 'chunk' ); var cache = this.pipelineCache[type]; if ( cache.length < 5 ) { cache.push( pipe ); } }; /******************** ParserPipeline ****************************/ /** * Wrap some stages into a pipeline. The last member of the pipeline is * supposed to emit events, while the first is supposed to support a process() * method that sets the pipeline in motion. */ function ParserPipeline ( first, last, returnToCacheCB ) { this.first = first; this.last = last; if ( returnToCacheCB ) { var self = this; this.returnToCacheCB = function () { returnToCacheCB( self ); }; // add a callback to return the pipeline back to the cache this.last.addListener( 'end', this.returnToCacheCB ); } } /** * Feed input tokens to the first pipeline stage */ ParserPipeline.prototype.process = function(input, key) { return this.first.process(input, key); }; /** * Set the frame on the last pipeline stage (normally the * AsyncTokenTransformManager). */ ParserPipeline.prototype.setFrame = function(frame, title, args) { return this.last.setFrame(frame, title, args); }; /** * Register the first pipeline stage with the last stage from a separate pipeline */ ParserPipeline.prototype.addListenersOn = function(stage) { return this.first.addListenersOn(stage); }; // Forward the EventEmitter API to this.last ParserPipeline.prototype.on = function (ev, cb) { return this.last.on(ev, cb); }; ParserPipeline.prototype.once = function (ev, cb) { return this.last.once(ev, cb); }; ParserPipeline.prototype.addListener = function(ev, cb) { return this.last.addListener(ev, cb); }; ParserPipeline.prototype.removeListener = function(ev, cb) { return this.last.removeListener(ev, cb); }; ParserPipeline.prototype.setMaxListeners = function(n) { return this.last.setMaxListeners(n); }; ParserPipeline.prototype.listeners = function(ev) { return this.last.listeners(ev); }; ParserPipeline.prototype.removeAllListeners = function ( event ) { if ( event === 'end' ) { this.last.removeAllListeners('end'); // now re-add the cache callback if ( this.returnToCacheCB ) { this.last.addListener( 'end', this.returnToCacheCB ); } } else { return this.last.removeAllListeners( event ); } }; if (typeof module == "object") { module.exports.ParserPipeline = ParserPipeline; module.exports.ParserPipelineFactory = ParserPipelineFactory; }