/** * Token transformation managers with a (mostly) abstract * TokenTransformManager base class and AsyncTokenTransformManager and * SyncTokenTransformManager implementation subclasses. Individual * transformations register for the token types they are interested in and are * called on each matching token. * * Async token transformations are supported by the TokenAccumulator class, * that manages as-early-as-possible and in-order return of tokens including * buffering. * * See * https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations * for more documentation. * * @author Gabriel Wicke */ var events = require('events'); /** * Base class for token transform managers * * @class * @constructor * @param {Function} callback, a callback function accepting a token list as * its only argument. */ function TokenTransformManager( ) { // Separate the constructor, so that we can call it from subclasses. this._construct(); } // Inherit from EventEmitter TokenTransformManager.prototype = new events.EventEmitter(); TokenTransformManager.prototype.constructor = TokenTransformManager; TokenTransformManager.prototype._construct = function () { this.transformers = { tag: {}, // for TAG, ENDTAG, SELFCLOSINGTAG, keyed on name text: [], newline: [], comment: [], end: [], // eof martian: [], // none of the above (unknown token type) any: [] // all tokens, before more specific handlers are run }; }; /** * Register to a token source, normally the tokenizer. * The event emitter emits a 'chunk' event with a chunk of tokens, * and signals the end of tokens by triggering the 'end' event. * XXX: Perform registration directly in the constructor? * * @method * @param {Object} EventEmitter token even emitter. */ TokenTransformManager.prototype.listenForTokensFrom = function ( tokenEmitter ) { tokenEmitter.addListener('chunk', this.onChunk.bind( this ) ); tokenEmitter.addListener('end', this.onEndEvent.bind( this ) ); }; /** * Map a rank to a phase. * * XXX: Might not be needed anymore, as phases are now subclassed and * registrations are separated. */ TokenTransformManager.prototype._rankToPhase = function ( rank ) { if ( rank < 0 || rank > 3 ) { throw "TransformManager error: Invalid transformation rank " + rank; } if ( rank <= 2 ) { return 2; } else { return 3; } }; /** * Add a transform registration. * * @method * @param {Function} transform. * @param {Number} rank, [0,3) with [0,1) in-order on input token stream, * [1,2) out-of-order and [2,3) in-order on output token stream * @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end', * 'martian' (unknown token), 'any' (any token, matched before other matches). * @param {String} tag name for tags, omitted for non-tags */ TokenTransformManager.prototype.addTransform = function ( transformation, rank, type, name ) { var transArr, transformer = { transform: transformation, rank: rank }; if ( type === 'tag' ) { name = name.toLowerCase(); transArr = this.transformers.tag[name]; if ( ! transArr ) { transArr = this.transformers.tag[name] = []; } } else { transArr = this.transformers[type]; } transArr.push(transformer); // sort ascending by rank transArr.sort( this._cmpTransformations ); }; /** * Remove a transform registration * * @method * @param {Function} transform. * @param {Number} rank, [0,3) with [0,1) in-order on input token stream, * [1,2) out-of-order and [2,3) in-order on output token stream * @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end', * 'martian' (unknown token), 'any' (any token, matched before other matches). * @param {String} tag name for tags, omitted for non-tags */ TokenTransformManager.prototype.removeTransform = function ( rank, type, name ) { var i = -1, ts; function rankUnEqual ( i ) { return i.rank !== rank; } if ( type === 'tag' ) { name = name.toLowerCase(); var maybeTransArr = this.transformers.tag.name; if ( maybeTransArr ) { this.transformers.tag.name = maybeTransArr.filter( rankUnEqual ); } } else { this.transformers[type] = this.transformers[type].filter( rankUnEqual ) ; } }; /** * Enforce separation between phases when token types or tag names have * changed, or when multiple tokens were returned. Processing will restart * with the new rank. * * XXX: This should also be moved to the subclass (actually partially implicit if * _transformTagToken and _transformToken are subclassed and set the rank when * fully processed). The token type change case still needs to be covered * though. */ TokenTransformManager.prototype._resetTokenRank = function ( res, transformer ) { if ( res.token ) { // reset rank after type or name change if ( transformer.rank < 1 ) { res.token.rank = 0; } else { res.token.rank = 1; } } else if ( res.tokens && transformer.rank > 2 ) { for ( var i = 0; i < res.tokens.length; i++ ) { if ( res.tokens[i].rank === undefined ) { // Do not run phase 0 on newly created tokens from // phase 1. res.tokens[i].rank = 2; } } } }; /** * Comparison for sorting transformations by ascending rank. */ TokenTransformManager.prototype._cmpTransformations = function ( a, b ) { return a.rank - b.rank; }; /* Call all transformers on a tag. * XXX: Move to subclasses and use a different signature? * * @method * @param {Object} The current token. * @param {Function} Completion callback for async processing. * @param {Number} Rank of phase end, both key for transforms and rank for * processed tokens. * @returns {Object} Token(s) and async indication. */ TokenTransformManager.prototype._transformTagToken = function ( token, cb, phaseEndRank ) { // prepend 'any' transformers var ts = this.transformers.any, res = { token: token }, transform, l, i, aborted = false, tName = token.name.toLowerCase(), tagts = this.transformers.tag[tName]; if ( tagts && tagts.length ) { // could cache this per tag type to avoid re-sorting each time ts = ts.concat(tagts); ts.sort( this._cmpTransformations ); } //console.log(JSON.stringify(ts, null, 2)); if ( ts ) { for ( i = 0, l = ts.length; i < l; i++ ) { transformer = ts[i]; if ( res.token.rank && transformer.rank <= res.token.rank ) { // skip transformation, was already applied. continue; } // Transform token with side effects res = transformer.transform( res.token, cb, this, this.prevToken ); // if multiple tokens or null token: process returned tokens (in parent) if ( !res.token || // async implies tokens instead of token, so no // need to check explicitly res.token.type !== token.type || res.token.name !== token.name ) { this._resetTokenRank ( res, transformer ); aborted = true; break; } // track progress on token res.token.rank = transformer.rank; } if ( ! aborted ) { // Mark token as fully processed. res.token.rank = phaseEndRank; } } return res; }; /* Call all transformers on non-tag token types. * XXX: different signature for sync vs. async, move to subclass? * * @method * @param {Object} The current token. * @param {Function} Completion callback for async processing. * @param {Number} Rank of phase end, both key for transforms and rank for * processed tokens. * @param {Array} ts List of token transformers for this token type. * @returns {Object} Token(s) and async indication. */ TokenTransformManager.prototype._transformToken = function ( token, cb, phaseEndRank, ts ) { // prepend 'any' transformers var anyTrans = this.transformers.any; if ( anyTrans.length ) { ts = this.transformers.any.concat(ts); ts.sort( this._cmpTransformations ); } var transformer, res = { token: token }, aborted = false; if ( ts ) { for (var i = 0, l = ts.length; i < l; i++ ) { transformer = ts[i]; if ( res.token.rank && transformer.rank <= res.token.rank ) { // skip transformation, was already applied. continue; } // Transform the token. // XXX: consider moving the rank out of the token itself to avoid // transformations messing with it in broken ways. Not sure if // some transformations need to manipulate it though. gwicke res = transformer.transform( res.token, cb, this, this.prevToken ); if ( !res.token || res.token.type !== token.type ) { this._resetTokenRank ( res, transformer ); aborted = true; break; } res.token.rank = transformer.rank; } if ( ! aborted ) { // mark token as completely processed res.token.rank = phaseEndRank; // need phase passed in! } } return res; }; /******************** Async token transforms: Phase 2 **********************/ /** * Asynchronous and potentially out-of-order token transformations, used in phase 2. * * return protocol for individual transforms: * { tokens: [tokens], async: true }: async expansion -> outstanding++ in parent * { tokens: [tokens] }: fully expanded, tokens will be reprocessed * { token: token }: single-token return * * @class * @constructor * @param {Function} childFactory: A function that can be used to create a * new, nested transform manager: * nestedAsyncTokenTransformManager = manager.newChildPipeline( inputType, args ); * @param {Object} args, the argument map for templates * @param {Object} env, the environment. */ function AsyncTokenTransformManager ( childFactory, args, env ) { // Factory function for new AsyncTokenTransformManager creation with // default transforms enabled // Also sets up a tokenizer and phase-1-transform depending on the input format // nestedAsyncTokenTransformManager = manager.newChildPipeline( inputType, args ); this.childFactory = childFactory; this._construct(); this._reset( args, env ); } // Inherit from TokenTransformManager, and thus also from EventEmitter. AsyncTokenTransformManager.prototype = new TokenTransformManager(); AsyncTokenTransformManager.prototype.constructor = AsyncTokenTransformManager; /** * Create a new child pipeline. * * @method * @param {String} Input type, currently only support 'text/wiki'. * @param {Object} Template arguments * @returns {Object} Pipeline, which is an object with 'first' pointing to the * first stage of the pipeline, and 'last' pointing to the last stage. */ AsyncTokenTransformManager.prototype.newChildPipeline = function ( inputType, args ) { var pipe = this.childFactory( inputType, args ); return pipe; }; /** * Reset the internal token and outstanding-callback state of the * TokenTransformManager, but keep registrations untouched. * * @method * @param {Object} args, template arguments * @param {Object} The environment. */ AsyncTokenTransformManager.prototype._reset = function ( args, env ) { // Note: Much of this is frame-like. this.tailAccumulator = undefined; // eventize: bend to event emitter callback this.tokenCB = this._returnTokens.bind( this ); this.accum = new TokenAccumulator(null); this.firstaccum = this.accum; this.prevToken = undefined; if ( ! args ) { this.args = {}; // no arguments at the top level } else { this.args = args; } if ( ! env ) { if ( !this.env ) { throw "AsyncTokenTransformManager: environment needed!" + env; } } else { this.env = env; } }; /** * Transform and expand tokens. Transformed token chunks will be emitted in * the 'chunk' event. * * @method * @param {Array} chunk of tokens */ AsyncTokenTransformManager.prototype.onChunk = function ( tokens ) { //console.log('TokenTransformManager onChunk'); // Set top-level callback to next transform phase var res = this.transformTokens ( tokens, this.tokenCB ); this.tailAccumulator = res.async; this.emit( 'chunk', tokens ); //this.phase2TailCB( tokens, true ); if ( res.async ) { this.tokenCB = res.async.getParentCB ( 'sibling' ); } }; /** * Run transformations from phases 0 and 1. This includes starting and * managing asynchronous transformations. * */ AsyncTokenTransformManager.prototype.transformTokens = function ( tokens, parentCB ) { //console.log('_transformPhase01: ' + JSON.stringify(tokens) ); var res, phaseEndRank = 2, // parametrize! // Prepare a new accumulator, to be used by async children (if any) localAccum = [], accum = new TokenAccumulator( parentCB ), cb = accum.getParentCB( 'child' ), activeAccum = null, tokensLength = tokens.length, token, ts = this.transformers; for ( var i = 0; i < tokensLength; i++ ) { token = tokens[i]; switch( token.type ) { case 'TAG': case 'ENDTAG': case 'SELFCLOSINGTAG': res = this._transformTagToken( token, cb, phaseEndRank ); break; case 'TEXT': res = this._transformToken( token, cb, phaseEndRank, ts.text ); break; case 'COMMENT': res = this._transformToken( token, cb, phaseEndRank, ts.comment); break; case 'NEWLINE': res = this._transformToken( token, cb, phaseEndRank, ts.newline ); break; case 'END': res = this._transformToken( token, cb, phaseEndRank, ts.end ); break; default: res = this._transformToken( token, cb, phaseEndRank, ts.martian ); break; } if( res.tokens ) { // Splice in the returned tokens (while replacing the original // token), and process them next. [].splice.apply( tokens, [i, 1].concat(res.tokens) ); tokensLength = tokens.length; i--; // continue at first inserted token } else if ( res.token ) { if ( res.token.rank === 2 ) { // token is done. if ( activeAccum ) { // push to accumulator activeAccum.push( res.token ); } else { // If there is no accumulator yet, then directly return the // token to the parent. Collect them in localAccum for this // purpose. localAccum.push(res.token); } } else { // re-process token. tokens[i] = res.token; i--; } } else if ( res.async ) { // The child now switched to activeAccum, we have to create a new // accumulator for the next potential child. activeAccum = accum; accum = new TokenAccumulator( activeAccum.getParentCB( 'sibling' ) ); cb = accum.getParentCB( 'child' ); } } // Return finished tokens directly to caller, and indicate if further // async actions are outstanding. The caller needs to point a sibling to // the returned accumulator, or call .siblingDone() to mark the end of a // chain. return { tokens: localAccum, async: activeAccum }; }; /** * Callback from tokens fully processed for phase 0 and 1, which are now ready * for synchronous and globally in-order phase 2 processing. * * @method * @param {Array} chunk of tokens * @param {Mixed} Either a falsy value if this is the last callback * (everything is done), or a truish value if not yet done. */ AsyncTokenTransformManager.prototype._returnTokens = function ( tokens, notYetDone ) { // FIXME: store frame in object? this.emit('chunk', tokens); //tokens = this._transformPhase2( this.frame, tokens, this.parentCB ); //console.log('AsyncTokenTransformManager._returnTokens, after _transformPhase2.'); //this.emit( 'chunk', tokens ); if ( ! notYetDone ) { console.log('AsyncTokenTransformManager._returnTokens done.'); // signal our done-ness to consumers. this.emit( 'end' ); // and reset internal state. this._reset(); } }; /** * Callback for the end event emitted from the tokenizer. * Either signals the end of input to the tail of an ongoing asynchronous * processing pipeline, or directly emits 'end' if the processing was fully * synchronous. */ AsyncTokenTransformManager.prototype.onEndEvent = function () { if ( this.tailAccumulator ) { this.tailAccumulator.siblingDone(); } else { // nothing was asynchronous, so we'll have to emit end here. this.emit('end'); this._reset(); } }; /*************** In-order, synchronous transformer (phase 1 and 3) ***************/ /** * Subclass for phase 3, in-order and synchronous processing. * * @class * @constructor * @param {Object} environment. */ function SyncTokenTransformManager ( env ) { // both inherited this._construct(); this.args = {}; // no arguments at the top level this.env = env; } // Inherit from TokenTransformManager, and thus also from EventEmitter. SyncTokenTransformManager.prototype = new TokenTransformManager(); SyncTokenTransformManager.prototype.constructor = SyncTokenTransformManager; /** * Global in-order and synchronous traversal on token stream. Emits * transformed chunks of tokens in the 'chunk' event. * * @method * @param {Array} Token chunk. */ SyncTokenTransformManager.prototype.onChunk = function ( tokens ) { var res, phaseEndRank = 3, localAccum = [], localAccumLength = 0, tokensLength = tokens.length, cb = undefined, // XXX: not meaningful for purely synchronous processing! token, // Top-level frame only in phase 3, as everything is already expanded. ts = this.transformers; for ( var i = 0; i < tokensLength; i++ ) { token = tokens[i]; switch( token.type ) { case 'TAG': case 'ENDTAG': case 'SELFCLOSINGTAG': res = this._transformTagToken( token, cb, phaseEndRank ); break; case 'TEXT': res = this._transformToken( token, cb, phaseEndRank, ts.text ); break; case 'COMMENT': res = this._transformToken( token, cb, phaseEndRank, ts.comment ); break; case 'NEWLINE': res = this._transformToken( token, cb, phaseEndRank, ts.newline ); break; case 'END': res = this._transformToken( token, cb, phaseEndRank, ts.end ); break; default: res = this._transformToken( token, cb, phaseEndRank, ts.martian ); break; } if( res.tokens ) { // Splice in the returned tokens (while replacing the original // token), and process them next. [].splice.apply( tokens, [i, 1].concat(res.tokens) ); tokensLength = tokens.length; i--; // continue at first inserted token } else if ( res.token ) { if ( res.token.rank === phaseEndRank ) { // token is done. localAccum.push(res.token); this.prevToken = res.token; } else { // re-process token. tokens[i] = res.token; i--; } } } this.emit( 'chunk', localAccum ); }; /** * Callback for the end event emitted from the tokenizer. * Either signals the end of input to the tail of an ongoing asynchronous * processing pipeline, or directly emits 'end' if the processing was fully * synchronous. */ SyncTokenTransformManager.prototype.onEndEvent = function () { // This phase is fully synchronous, so just pass the end along and prepare // for the next round. this.emit('end'); }; /******************************* TokenAccumulator *************************/ /** * Token accumulators buffer tokens between asynchronous processing points, * and return fully processed token chunks in-order and as soon as possible. * They support the AsyncTokenTransformManager. * * @class * @constructor * @param {Object} next TokenAccumulator to link to * @param {Array} (optional) tokens, init accumulator with tokens or [] */ function TokenAccumulator ( parentCB ) { this.parentCB = parentCB; this.accum = []; // Wait for child and sibling by default // Note: Need to decrement outstanding on last accum // in a chain. this.outstanding = 2; } /** * Curry a parentCB with the object and reference. * * @method * @param {Object} TokenAccumulator * @param {misc} Reference / key for callback * @returns {Function} */ TokenAccumulator.prototype.getParentCB = function ( reference ) { return this._returnTokens.bind( this, reference ); }; /** * Pass tokens to an accumulator * * @method * @param {Object} token */ TokenAccumulator.prototype._returnTokens = function ( reference, tokens, notYetDone ) { var res, cb, returnTokens = []; if ( ! notYetDone ) { this.outstanding--; } if ( reference === 'child' ) { // XXX: Use some marker to avoid re-transforming token chunks several // times? res = this.transformTokens( tokens, this.parentCB ); if ( res.async ) { // new asynchronous expansion started, chain of accumulators // created if ( this.outstanding === 0 ) { // Last accum in chain should only wait for child res.async.outstanding--; cb = this.parentCB; } else { cb = this.parentCB; // set own callback to new sibling, the end of accumulator chain this.parentCB = res.async.getParentCB( 'sibling' ); } } if ( ! notYetDone ) { // Child is done, return accumulator from sibling. Siblings // process tokens themselves, so we concat those to the result of // processing tokens from the child. tokens = res.tokens.concat( this.accum ); this.accum = []; } this.cb( res.tokens, res.async ); return null; } else { // sibling if ( this.outstanding === 0 ) { tokens = this.accum.concat( tokens ); // A sibling will transform tokens, so we don't have to do this // again. this.parentCB( res.tokens, false ); return null; } else if ( this.outstanding === 1 && notYetDone ) { // Sibling is not yet done, but child is. Return own parentCB to // allow the sibling to go direct, and call back parent with // tokens. The internal accumulator is empty at this stage, as its // tokens are passed to the parent when the child is done. return this.parentCB( tokens, true); } } }; /** * Mark the sibling as done (normally at the tail of a chain). */ TokenAccumulator.prototype.siblingDone = function () { this._returnTokens01 ( 'sibling', [], false ); }; /** * Push a token into the accumulator * * @method * @param {Object} token */ TokenAccumulator.prototype.push = function ( token ) { return this.accum.push(token); }; if (typeof module == "object") { module.exports.AsyncTokenTransformManager = AsyncTokenTransformManager; module.exports.SyncTokenTransformManager = SyncTokenTransformManager; }