/** * Token transformation managers with a (mostly) abstract * TokenTransformManager base class and AsyncTokenTransformManager and * SyncTokenTransformManager implementation subclasses. Individual * transformations register for the token types they are interested in and are * called on each matching token. * * Async token transformations are supported by the TokenAccumulator class, * that manages as-early-as-possible and in-order return of tokens including * buffering. * * See * https://www.mediawiki.org/wiki/Parsoid/Token_stream_transformations * for more documentation. * * @author Gabriel Wicke */ var events = require('events'), LRU = require("lru-cache"), jshashes = require('jshashes'); /** * Base class for token transform managers * * @class * @constructor * @param {Function} callback, a callback function accepting a token list as * its only argument. */ function TokenTransformManager( env, isInclude, pipeFactory, phaseEndRank, attributeType ) { // Separate the constructor, so that we can call it from subclasses. this._construct(); } // Inherit from EventEmitter TokenTransformManager.prototype = new events.EventEmitter(); TokenTransformManager.prototype.constructor = TokenTransformManager; TokenTransformManager.prototype._construct = function () { this.transformers = { tag: {}, // for TagTk, EndTagTk, SelfclosingTagTk, keyed on name text: [], newline: [], comment: [], end: [], // eof martian: [], // none of the above (unknown token type) any: [] // all tokens, before more specific handlers are run }; }; /** * Register to a token source, normally the tokenizer. * The event emitter emits a 'chunk' event with a chunk of tokens, * and signals the end of tokens by triggering the 'end' event. * XXX: Perform registration directly in the constructor? * * @method * @param {Object} EventEmitter token even emitter. */ TokenTransformManager.prototype.addListenersOn = function ( tokenEmitter ) { tokenEmitter.addListener('chunk', this.onChunk.bind( this ) ); tokenEmitter.addListener('end', this.onEndEvent.bind( this ) ); }; /** * Add a transform registration. * * @method * @param {Function} transform. * @param {Number} rank, [0,3) with [0,1) in-order on input token stream, * [1,2) out-of-order and [2,3) in-order on output token stream * @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end', * 'martian' (unknown token), 'any' (any token, matched before other matches). * @param {String} tag name for tags, omitted for non-tags */ TokenTransformManager.prototype.addTransform = function ( transformation, rank, type, name ) { var transArr, transformer = { transform: transformation, rank: rank }; if ( type === 'tag' ) { name = name.toLowerCase(); transArr = this.transformers.tag[name]; if ( ! transArr ) { transArr = this.transformers.tag[name] = []; } } else { transArr = this.transformers[type]; } transArr.push(transformer); // sort ascending by rank transArr.sort( this._cmpTransformations ); //this.env.dp( 'transforms: ', this.transformers ); }; /** * Remove a transform registration * * @method * @param {Function} transform. * @param {Number} rank, [0,3) with [0,1) in-order on input token stream, * [1,2) out-of-order and [2,3) in-order on output token stream * @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end', * 'martian' (unknown token), 'any' (any token, matched before other matches). * @param {String} tag name for tags, omitted for non-tags */ TokenTransformManager.prototype.removeTransform = function ( rank, type, name ) { var i = -1, ts; function rankUnEqual ( i ) { return i.rank !== rank; } if ( type === 'tag' ) { name = name.toLowerCase(); var maybeTransArr = this.transformers.tag.name; if ( maybeTransArr ) { this.transformers.tag.name = maybeTransArr.filter( rankUnEqual ); } } else { this.transformers[type] = this.transformers[type].filter( rankUnEqual ) ; } }; TokenTransformManager.prototype.setTokensRank = function ( tokens, rank ) { for ( var i = 0, l = tokens.length; i < l; i++ ) { tokens[i] = this.env.setTokenRank( rank, tokens[i] ); } }; /** * Predicate for sorting transformations by ascending rank. */ TokenTransformManager.prototype._cmpTransformations = function ( a, b ) { return a.rank - b.rank; }; /** * Get all transforms for a given token */ TokenTransformManager.prototype._getTransforms = function ( token, minRank ) { var ts; switch ( token.constructor ) { case String: ts = this.transformers.text; break; case NlTk: ts = this.transformers.newline; break; case CommentTk: ts = this.transformers.comment; break; case EOFTk: ts = this.transformers.end; break; case TagTk: case EndTagTk: case SelfclosingTagTk: ts = this.transformers.tag[token.name.toLowerCase()]; if ( ! ts ) { ts = []; } break; default: ts = this.transformers.martian; break; } // XXX: cache this to avoid constant re-sorting? if ( this.transformers.any.length ) { ts = ts.concat( this.transformers.any ); ts.sort( this._cmpTransformations ); } if ( minRank !== undefined ) { // skip transforms <= minRank var i = 0; for ( l = ts.length; i < l && ts[i].rank <= minRank; i++ ) { } return ( i && ts.slice( i ) ) || ts; } else { return ts; } }; /******************** Async token transforms: Phase 2 **********************/ /** * Asynchronous and potentially out-of-order token transformations, used in phase 2. * * return protocol for individual transforms: * { tokens: [tokens], async: true }: async expansion -> outstanding++ in parent * { tokens: [tokens] }: fully expanded, tokens will be reprocessed * * @class * @constructor */ function AsyncTokenTransformManager ( env, isInclude, pipeFactory, phaseEndRank, attributeType ) { this.env = env; this.isInclude = isInclude; this.pipeFactory = pipeFactory; this.phaseEndRank = phaseEndRank; this.attributeType = attributeType; this.setFrame( null, null, [] ); this._construct(); } // Inherit from TokenTransformManager, and thus also from EventEmitter. AsyncTokenTransformManager.prototype = new TokenTransformManager(); AsyncTokenTransformManager.prototype.constructor = AsyncTokenTransformManager; /** * Reset the internal token and outstanding-callback state of the * TokenTransformManager, but keep registrations untouched. * * @method */ AsyncTokenTransformManager.prototype.setFrame = function ( parentFrame, title, args ) { this.env.dp( 'AsyncTokenTransformManager.setFrame', title, args ); // First piggy-back some reset action this.tailAccumulator = null; // initial top-level callback, emits chunks this.tokenCB = this.emitChunk.bind( this ); // now actually set up the frame if (parentFrame) { if ( title === null ) { // attribute, simply reuse the parent frame this.frame = parentFrame; } else { this.frame = parentFrame.newChild( title, this, args ); } } else { this.frame = new Frame(title, this, args ); } }; /** * Callback for async returns from head of TokenAccumulator chain */ AsyncTokenTransformManager.prototype.emitChunk = function( ret ) { this.env.dp( 'emitChunk', ret ); this.emit( 'chunk', ret.tokens ); if ( ! ret.async ) { this.emit('end'); } else { // allow accumulators to go direct return this.emitChunk.bind( this ); } }; /** * Simple wrapper that processes all tokens passed in */ AsyncTokenTransformManager.prototype.process = function ( tokens ) { if ( ! $.isArray ( tokens ) ) { tokens = [tokens]; } this.onChunk( tokens ); this.onEndEvent(); }; /** * Transform and expand tokens. Transformed token chunks will be emitted in * the 'chunk' event. * * @method * @param {Array} chunk of tokens */ AsyncTokenTransformManager.prototype.onChunk = function ( tokens ) { // Set top-level callback to next transform phase var res = this.transformTokens ( tokens, this.tokenCB ); this.env.dp( 'AsyncTokenTransformManager onChunk', res.async? 'async' : 'sync', res.tokens ); // Emit or append the returned tokens if ( ! this.tailAccumulator ) { this.env.dp( 'emitting' ); this.emit( 'chunk', res.tokens ); } else { this.env.dp( 'appending to tail' ); this.tailAccumulator.append( res.tokens ); } // Update the tail of the current accumulator chain if ( res.async ) { this.tailAccumulator = res.async; this.tokenCB = res.async.getParentCB ( 'sibling' ); } }; /** * Callback for the end event emitted from the tokenizer. * Either signals the end of input to the tail of an ongoing asynchronous * processing pipeline, or directly emits 'end' if the processing was fully * synchronous. */ AsyncTokenTransformManager.prototype.onEndEvent = function () { if ( this.tailAccumulator ) { this.env.dp( 'AsyncTokenTransformManager.onEndEvent: calling siblingDone', this.frame.title ); this.tailAccumulator.siblingDone(); } else { // nothing was asynchronous, so we'll have to emit end here. this.env.dp( 'AsyncTokenTransformManager.onEndEvent: synchronous done', this.frame.title ); this.emit('end'); //this._reset(); } }; /** * Utility method to set up a new TokenAccumulator with the right callbacks. */ AsyncTokenTransformManager.prototype._makeNextAccum = function( cb, state ) { var newAccum = new TokenAccumulator( this, cb ); var _cbs = { parent: newAccum.getParentCB( 'child' ) }; var newCB = this.maybeSyncReturn.bind( this, state, _cbs ); _cbs.self = newCB; return { accum: newAccum, cb: newCB }; }; // Debug counter, provides an UID for transformTokens calls so that callbacks // associated with it can be identified in debugging output as c-XXX. AsyncTokenTransformManager.prototype._counter = 0; /** * Run asynchronous transformations. This is the big workhorse where * templates, images, links and other async expansions (see the transform * recipe mediawiki.parser.js) are processed. * * @param tokens {Array}: Chunk of tokens, potentially with rank and other * meta information associated with it. * @param parentCB {Function}: callback for asynchronous results * @returns {Object}: { tokens: [], async: falsy or the tail TokenAccumulator } * The returned chunk is fully expanded for this phase, and the rank set * to reflect this. */ AsyncTokenTransformManager.prototype.transformTokens = function ( tokens, parentCB ) { //console.warn('AsyncTokenTransformManager.transformTokens: ' + JSON.stringify(tokens) ); var inputRank = tokens.rank || 0, localAccum = [], // a local accum for synchronously returned fully processed tokens activeAccum = localAccum, // start out collecting tokens in localAccum // until the first async transform is hit workStack = [], // stack of stacks (reversed chunks) of tokens returned // from transforms to process before consuming the next // input token token, // currently processed token s = { // Shared state accessible to synchronous transforms in this.maybeSyncReturn transforming: true, res: {}, // debug id for this expansion c: 'c-' + AsyncTokenTransformManager.prototype._counter++ }, minRank; // make localAccum compatible with getParentCB('sibling') localAccum.getParentCB = function() { return parentCB }; var nextAccum = this._makeNextAccum( parentCB, s ); var i = 0, l = tokens.length; while ( i < l || workStack.length ) { if ( workStack.length ) { var curChunk = workStack[workStack.length - 1]; minRank = curChunk.rank || inputRank; token = curChunk.pop(); if ( !curChunk.length ) { // activate nextActiveAccum after consuming the chunk if ( curChunk.nextActiveAccum ) { if ( activeAccum !== curChunk.oldActiveAccum ) { // update the callback of the next active accum curChunk.nextActiveAccum.setParentCB( activeAccum.getParentCB('sibling') ); } activeAccum = curChunk.nextActiveAccum; // create new accum and cb for transforms nextAccum = this._makeNextAccum( activeAccum.getParentCB('sibling'), s ); } // remove empty chunk from workstack workStack.pop(); } } else { token = tokens[i]; i++; minRank = inputRank; if ( token.constructor === Array ) { if ( ! token.length ) { // skip it } else if ( ! token.rank || token.rank < this.phaseEndRank ) { workStack.push( token ); } else { // don't process the array in this phase. activeAccum.push( token ); } continue; } else if ( token.constructor === ParserValue ) { // Parser functions etc that run before full attribute // expansion are responsible for the full expansion of // returned attributes in their respective environments. throw( 'Unexpected ParserValue in AsyncTokenTransformManager.transformTokens:' + JSON.stringify( token ) ); } } var ts = this._getTransforms( token, minRank ); //this.env.dp( 'async token:', s.c, token, minRank, ts ); if ( ! ts.length ) { // nothing to do for this token activeAccum.push( token ); } else { //this.env.tp( 'async trans' ); for (var j = 0, lts = ts.length; j < lts; j++ ) { var transformer = ts[j]; s.res = { }; // Transform the token. transformer.transform( token, this.frame, nextAccum.cb ); var resTokens = s.res.tokens; //this.env.dp( 's.res:', s.c, s.res ); // Check the result, which is changed using the // maybeSyncReturn callback if ( resTokens && resTokens.length ) { if ( resTokens.length === 1 ) { if ( resTokens[0] === undefined ) { console.warn('transformer ' + transformer.rank + ' returned undefined token!'); resTokens.shift(); break; } if ( token === resTokens[0] && ! resTokens.rank ) { // token not modified, continue with // transforms. token = resTokens[0]; continue; } else if ( resTokens.rank === this.phaseEndRank || ( resTokens[0].constructor === String && ! this.transformers.text.length ) ) { // Fast path for text token, and nothing to do for it // Abort processing, but treat token as done. token = resTokens[0]; break; } } // token(s) were potentially modified if ( ! resTokens.rank || resTokens.rank < this.phaseEndRank ) { // There might still be something to do for these // tokens. Prepare them for the workStack. var revTokens = resTokens.slice(); revTokens.reverse(); // Don't apply earlier transforms to results of a // transformer to avoid loops and keep the // execution model sane. revTokens.rank = resTokens.rank || transformer.rank; //revTokens.rank = Math.max( resTokens.rank || 0, transformer.rank ); revTokens.oldActiveAccum = activeAccum; workStack.push( revTokens ); if ( s.res.async ) { revTokens.nextActiveAccum = nextAccum.accum; } // create new accum and cb for transforms //activeAccum = nextAccum.accum; nextAccum = this._makeNextAccum( activeAccum.getParentCB('sibling'), s ); // don't trigger activeAccum switch / _makeNextAccum call below s.res.async = false; this.env.dp( 'workStack', s.c, revTokens.rank, workStack ); } } // Abort processing for this token token = null; break; } if ( token !== null ) { // token is done. // push to accumulator //console.warn( 'pushing ' + token ); activeAccum.push( token ); } if ( s.res.async ) { this.env.dp( 'res.async, creating new TokenAccumulator', s.c ); // The child now switched to activeAccum, we have to create a new // accumulator for the next potential child. activeAccum = nextAccum.accum; nextAccum = this._makeNextAccum( activeAccum.getParentCB('sibling'), s ); } } } // we are no longer transforming, maybeSyncReturn needs to follow the // async code path s.transforming = false; // All tokens in localAccum are fully processed localAccum.rank = this.phaseEndRank; this.env.dp( 'localAccum', activeAccum !== localAccum ? 'async' : 'sync', s.c, localAccum ); // Return finished tokens directly to caller, and indicate if further // async actions are outstanding. The caller needs to point a sibling to // the returned accumulator, or call .siblingDone() to mark the end of a // chain. var retAccum = activeAccum !== localAccum ? activeAccum : null; return { tokens: localAccum, async: retAccum }; }; /** * Callback for async transforms * * Converts direct callbacks into a synchronous return by collecting the * results in s.res. Re-start transformTokens for any async returns, and calls * the provided asyncCB (TokenAccumulator._returnTokens normally). */ AsyncTokenTransformManager.prototype.maybeSyncReturn = function ( s, cbs, ret ) { if ( s.transforming ) { if ( ! ret ) { // support empty callbacks, for simple async signalling s.res.async = true; return; } // transformTokens is still ongoing, handle as sync return by // collecting the results in s.res this.env.dp( 'maybeSyncReturn transforming', s.c, ret ); if ( ret.tokens ) { if ( ret.tokens.constructor !== Array ) { ret.tokens = [ ret.tokens ]; } if ( s.res.tokens ) { var oldRank = s.res.tokens.rank; s.res.tokens = s.res.tokens.concat( ret.tokens ); if ( oldRank && ret.tokens.rank ) { // Conservatively set the overall rank to the minimum. // This assumes that multi-pass expansion for some tokens // is safe. We might want to revisit that later. Math.min( oldRank, ret.tokens.rank ); } s.res.async = ret.async; } else { s.res = ret; } } else if ( ret.constructor === Array ) { console.trace(); } s.res.async = ret.async; //console.trace(); } else if ( ret !== undefined ) { // Since the original transformTokens call is already done, we have to // re-start application of any remaining transforms here. this.env.dp( 'maybeSyncReturn async', s.c, ret ); var asyncCB = cbs.parent, tokens = ret.tokens; if ( tokens && tokens.length && ( ! tokens.rank || tokens.rank < this.phaseEndRank ) && ! ( tokens.length === 1 && tokens[0].constructor === String ) ) { // Re-process incomplete tokens this.env.dp( 'maybeSyncReturn: recursive transformTokens', this.frame.title, ret.tokens ); // Set up a new child callback with its own callback state var _cbs = { async: cbs.parent }, childCB = this.maybeSyncReturn.bind( this, s, _cbs ); _cbs.self = childCB; var res = this.transformTokens( ret.tokens, childCB ); ret.tokens = res.tokens; if ( res.async ) { // Insert new child accumulator chain- any further chunks from // the transform will be passed as sibling to the last accum // in this chain, and the new chain will pass its results to // the former parent accumulator. if ( ! ret.async ) { // There will be no more input to the child pipeline res.async.siblingDone(); // We need to indicate that more results will follow from // the child pipeline. ret.async = true; } else { // More tokens will follow from original expand. // Need to return results of recursive expand *before* further // async results, so we simply pass further results to the // last accumulator in the new chain. cbs.parent = res.async.getParentCB( 'sibling' ); } } } asyncCB( ret ); if ( ret.async ) { // Pass reference to maybeSyncReturn to TokenAccumulators to allow // them to call directly return cbs.self; } } }; /*************** In-order, synchronous transformer (phase 1 and 3) ***************/ /** * Subclass for phase 3, in-order and synchronous processing. * * @class * @constructor * @param {Object} environment. */ function SyncTokenTransformManager ( env, isInclude, pipeFactory, phaseEndRank, attributeType ) { this.env = env; this.isInclude = isInclude; this.pipeFactory = pipeFactory; this.phaseEndRank = phaseEndRank; this.attributeType = attributeType; this._construct(); } // Inherit from TokenTransformManager, and thus also from EventEmitter. SyncTokenTransformManager.prototype = new TokenTransformManager(); SyncTokenTransformManager.prototype.constructor = SyncTokenTransformManager; SyncTokenTransformManager.prototype.process = function ( tokens ) { if ( ! $.isArray ( tokens ) ) { tokens = [tokens]; } this.onChunk( tokens ); //console.warn( JSON.stringify( this.transformers ) ) this.onEndEvent(); }; /** * Global in-order and synchronous traversal on token stream. Emits * transformed chunks of tokens in the 'chunk' event. * * @method * @param {Array} Token chunk. */ SyncTokenTransformManager.prototype.onChunk = function ( tokens ) { this.env.dp( 'SyncTokenTransformManager.onChunk, input: ', tokens ); var res, localAccum = [], localAccumLength = 0, workStack = [], // stack of stacks of tokens returned from transforms // to process before consuming the next input token token, // Top-level frame only in phase 3, as everything is already expanded. ts, transformer, aborted, minRank; for ( var i = 0, l = tokens.length; i < l || workStack.length; i++ ) { aborted = false; if ( workStack.length ) { i--; var curChunk = workStack[workStack.length - 1]; minRank = curChunk.rank || tokens.rank || this.phaseEndRank - 1; token = curChunk.pop(); if ( !curChunk.length ) { // remove empty chunk workStack.pop(); } } else { token = tokens[i]; minRank = tokens.rank || this.phaseEndRank - 1; } res = { token: token }; ts = this._getTransforms( token, minRank ); //this.env.dp( 'sync tok:', minRank, token.rank, token, ts ); for (var j = 0, lts = ts.length; j < lts; j++ ) { transformer = ts[j]; // Transform the token. res = transformer.transform( token, this, this.prevToken ); //this.env.dp( 'sync res0:', res ); if ( res.token !== token ) { aborted = true; if ( res.token ) { res.tokens = [res.token]; delete res.token; } break; } token = res.token; } //this.env.dp( 'sync res:', res ); if( res.tokens && res.tokens.length ) { // Splice in the returned tokens (while replacing the original // token), and process them next. var revTokens = res.tokens.slice(); revTokens.reverse(); revTokens.rank = transformer.rank; workStack.push( revTokens ); } else if ( res.token ) { localAccum.push(res.token); this.prevToken = res.token; } else { this.prevToken = token; } } localAccum.rank = this.phaseEndRank; localAccum.cache = tokens.cache; this.env.dp( 'SyncTokenTransformManager.onChunk: emitting ', localAccum ); this.emit( 'chunk', localAccum ); }; /** * Callback for the end event emitted from the tokenizer. * Either signals the end of input to the tail of an ongoing asynchronous * processing pipeline, or directly emits 'end' if the processing was fully * synchronous. */ SyncTokenTransformManager.prototype.onEndEvent = function () { // This phase is fully synchronous, so just pass the end along and prepare // for the next round. this.emit('end'); }; /********************** AttributeTransformManager *************************/ /** * Utility transformation manager for attributes, using an attribute * transformation pipeline (normally phase1 SyncTokenTransformManager and * phase2 AsyncTokenTransformManager). This pipeline needs to be independent * of the containing TokenTransformManager to isolate transforms from each * other. The AttributeTransformManager returns its result by calling the * supplied callback. * * @class * @constructor * @param {Object} Containing AsyncTokenTransformManager * @param {Function} Callback function, called with expanded attribute array. */ function AttributeTransformManager ( manager, callback ) { this.manager = manager; this.frame = this.manager.frame; this.callback = callback; this.outstanding = 1; this.kvs = []; //this.pipe = manager.getAttributePipeline( manager.args ); } // A few constants AttributeTransformManager.prototype._toType = 'tokens/x-mediawiki/expanded'; /** * Expand both key and values of all key/value pairs. Used for generic * (non-template) tokens in the AttributeExpander handler, which runs after * templates are already expanded. */ AttributeTransformManager.prototype.process = function ( attributes ) { var pipe, ref; //console.warn( 'AttributeTransformManager.process: ' + JSON.stringify( attributes ) ); // transform each argument (key and value), and handle asynchronous returns for ( var i = 0, l = attributes.length; i < l; i++ ) { var cur = attributes[i]; // fast path for string-only attributes if ( cur.k.constructor === String && cur.v.constructor === String ) { this.kvs.push( cur ); continue; } var kv = new KV( [], [] ); this.kvs.push( kv ); if ( cur.v.constructor === Array && cur.v.length ) { // Assume that the return is async, will be decremented in callback this.outstanding++; // transform the value this.frame.expand( cur.v, { type: this._toType, cb: this._returnAttributeValue.bind( this, i ) } ); } else { kv.v = cur.v; } if ( cur.k.constructor === Array && cur.k.length ) { // Assume that the return is async, will be decremented in callback this.outstanding++; // transform the key this.frame.expand( cur.k, { type: this._toType, cb: this._returnAttributeKey.bind( this, i ) } ); } else { kv.k = cur.k; } } this.outstanding--; if ( this.outstanding === 0 ) { // synchronous, done this.callback( this.kvs ); } }; /** * Expand only keys of key/value pairs. This is generally used for template * parameters to avoid expanding unused values, which is very important for * constructs like switches. */ AttributeTransformManager.prototype.processKeys = function ( attributes ) { var pipe, ref; //console.warn( 'AttributeTransformManager.process: ' + JSON.stringify( attributes ) ); // TODO: wrap in chunk and call // .get( { type: 'text/x-mediawiki/expanded' } ) on it // transform the key for each attribute pair var kv; for ( var i = 0, l = attributes.length; i < l; i++ ) { var cur = attributes[i]; // fast path for string-only attributes if ( cur.k.constructor === String && cur.v.constructor === String ) { kv = new KV( cur.k, this.frame.newParserValue( cur.v ) ); this.kvs.push( kv ); continue; } // Wrap the value in a ParserValue for lazy expansion kv = new KV( [], this.frame.newParserValue( cur.v ) ); this.kvs.push( kv ); // And expand the key, if needed if ( cur.k.constructor === Array && cur.k.length && ! cur.k.get ) { // Assume that the return is async, will be decremented in callback this.outstanding++; // transform the key this.frame.expand( cur.k, { type: this._toType, cb: this._returnAttributeKey.bind( this, i ) } ); } else { kv.k = cur.k; } } this.outstanding--; if ( this.outstanding === 0 ) { // synchronously done this.callback( this.kvs ); } }; /** * Only expand values of attribute key/value pairs. */ AttributeTransformManager.prototype.processValues = function ( attributes ) { // Potentially need to use multiple pipelines to support concurrent async expansion //this.pipe.process( var pipe, ref; //console.warn( 'AttributeTransformManager.process: ' + JSON.stringify( attributes ) ); // transform each value for ( var i = 0, l = attributes.length; i < l; i++ ) { var cur = attributes[i]; var kv = new KV( cur.k, [] ); this.kvs.push( kv ); if ( ! cur ) { console.warn( JSON.stringify( attributes ) ); console.trace(); continue; } if ( cur.v.constructor === Array && cur.v.length ) { // Assume that the return is async, will be decremented in callback this.outstanding++; // transform the value this.frame.expand( cur.v, { type: this._toType, cb: this._returnAttributeValue.bind( this, i ) } ); } else { kv.value = cur.v; } } this.outstanding--; if ( this.outstanding === 0 ) { // synchronously done this.callback( this.kvs ); } }; /** * Callback for async argument value expansions */ AttributeTransformManager.prototype._returnAttributeValue = function ( ref, tokens ) { this.manager.env.dp( 'check _returnAttributeValue: ', ref, tokens ); this.kvs[ref].v = tokens; this.kvs[ref].v = this.manager.env.stripEOFTkfromTokens( this.kvs[ref].v ); this.outstanding--; if ( this.outstanding === 0 ) { this.callback( this.kvs ); } }; /** * Callback for async argument key expansions */ AttributeTransformManager.prototype._returnAttributeKey = function ( ref, tokens ) { //console.warn( 'check _returnAttributeKey: ' + JSON.stringify( tokens ) ); this.kvs[ref].k = tokens; this.kvs[ref].k = this.manager.env.stripEOFTkfromTokens( this.kvs[ref].k ); if ( this.kvs[ref].v === '' ) { // FIXME: use tokenizer production to properly parse this var m = this.manager.env.tokensToString( this.kvs[ref].k ) .match( /([^=]+)=['"]?([^'"]*)['"]?$/ ); if ( m ) { this.kvs[ref].k = m[1]; this.kvs[ref].v = m[2]; //console.warn( m + JSON.stringify( this.kvs[ref] ) ); } } this.outstanding--; if ( this.outstanding === 0 ) { this.callback( this.kvs ); } }; /******************************* TokenAccumulator *************************/ /** * Token accumulators buffer tokens between asynchronous processing points, * and return fully processed token chunks in-order and as soon as possible. * They support the AsyncTokenTransformManager. * * @class * @constructor * @param {Object} next TokenAccumulator to link to * @param {Array} (optional) tokens, init accumulator with tokens or [] */ function TokenAccumulator ( manager, parentCB ) { this.manager = manager; this.parentCB = parentCB; this.accum = []; // Wait for child and sibling by default // Note: Need to decrement outstanding on last accum // in a chain. this.outstanding = 2; } /** * Curry a parentCB with the object and reference. * * @method * @param {Object} TokenAccumulator * @param {misc} Reference / key for callback * @returns {Function} */ TokenAccumulator.prototype.getParentCB = function ( reference ) { return this._returnTokens.bind( this, reference ); }; TokenAccumulator.prototype.setParentCB = function ( cb ) { this.parentCB = cb; }; /** * Pass tokens to an accumulator * * @method * @param {String}: reference, 'child' or 'sibling'. * @param {Object}: { tokens, async } * @returns {Mixed}: new parent callback for caller or falsy value */ TokenAccumulator.prototype._returnTokens = function ( reference, ret ) { var cb, returnTokens = []; if ( ! ret.async ) { this.outstanding--; } this.manager.env.dp( 'TokenAccumulator._returnTokens', reference, ret ); // FIXME if ( ret.tokens === undefined ) { if ( this.manager.env.debug ) { console.warn( 'ret.tokens undefined: ' + JSON.stringify( ret ) ); console.trace(); } if ( ret.token !== undefined ) { ret.tokens = [ret.token]; } else { ret.tokens = []; } } if ( reference === 'child' ) { var res = {}; if ( !ret.async ) { // empty accum too ret.tokens = ret.tokens.concat( this.accum ); this.accum = []; } //this.manager.env.dp( 'TokenAccumulator._returnTokens child: ', // tokens, ' outstanding: ', this.outstanding ); ret.tokens.rank = this.manager.phaseEndRank; ret.async = this.outstanding; this._callParentCB( ret ); return null; } else { // sibling if ( this.outstanding === 0 ) { ret.tokens = this.accum.concat( ret.tokens ); // A sibling has already transformed child tokens, so we don't // have to do this again. //this.manager.env.dp( 'TokenAccumulator._returnTokens: ', // 'sibling done and parentCB ', // tokens ); ret.tokens.rank = this.manager.phaseEndRank; ret.async = false; this.parentCB( ret ); return null; } else if ( this.outstanding === 1 && ret.async ) { // Sibling is not yet done, but child is. Return own parentCB to // allow the sibling to go direct, and call back parent with // tokens. The internal accumulator is empty at this stage, as its // tokens are passed to the parent when the child is done. ret.tokens.rank = this.manager.phaseEndRank; return this._callParentCB( ret ); } else { // sibling tokens are always fully processed for this phase, so we // can directly concatenate them here. this.accum = this.accum.concat( ret.tokens ); this.manager.env.dp( 'TokenAccumulator._returnTokens: sibling done, but not overall. async=', ret.async, ', this.outstanding=', this.outstanding, ', this.accum=', this.accum, ' frame.title=', this.manager.frame.title ); } } }; /** * Mark the sibling as done (normally at the tail of a chain). */ TokenAccumulator.prototype.siblingDone = function () { //console.warn( 'TokenAccumulator.siblingDone: ' ); this._returnTokens ( 'sibling', { tokens: [], async: false } ); }; TokenAccumulator.prototype._callParentCB = function ( ret ) { var cb = this.parentCB( ret ); if ( cb ) { this.parentCB = cb; } return this.parentCB; }; /** * Push a token into the accumulator * * @method * @param {Object} token */ TokenAccumulator.prototype.push = function ( token ) { return this.accum.push(token); }; /** * Append tokens to an accumulator * * @method * @param {Object} token */ TokenAccumulator.prototype.append = function ( token ) { this.accum = this.accum.concat( token ); }; /******************************* Frame ******************************/ /** * The Frame object * * A frame represents a template expansion scope including parameters passed * to the template (args). It provides a generic 'expand' method which * expands / converts individual parameter values in its scope. It also * provides methods to check if another expansion would lead to loops or * exceed the maximum expansion depth. */ function Frame ( title, manager, args, parentFrame ) { this.title = title; this.manager = manager; this.args = new Params( this.manager.env, args ); // cache key fragment for expansion cache // FIXME: better use fully expand args for the cache key! Can avoid using // the parent cache keys in that case. var MD5 = new jshashes.MD5(); if ( args._cacheKey === undefined ) { args._cacheKey = MD5.hex( JSON.stringify( args ) ); } if ( parentFrame ) { this.parentFrame = parentFrame; this.depth = parentFrame.depth + 1; this._cacheKey = MD5.hex( parentFrame._cacheKey + args._cacheKey ); } else { this.parentFrame = null; this.depth = 0; this._cacheKey = args._cacheKey; } } /** * Create a new child frame */ Frame.prototype.newChild = function ( title, manager, args ) { return new Frame( title, manager, args, this ); }; /** * Expand / convert a thunk (a chunk of tokens not yet fully expanded). * * XXX: support different input formats, expansion phases / flags and more * output formats. */ Frame.prototype.expand = function ( chunk, options ) { var outType = options.type || 'text/x-mediawiki/expanded'; var cb = options.cb || console.warn( JSON.stringify( options ) ); this.manager.env.dp( 'Frame.expand', this._cacheKey, chunk ); if ( chunk.constructor === String ) { // Plain text remains text. Nothing to do. if ( outType !== 'text/x-mediawiki/expanded' ) { return cb( [ chunk ] ); } else { return cb( chunk ); } } else if ( chunk.constructor === ParserValue ) { // Delegate to ParserValue return chunk.get( options ); } // We are now dealing with an Array of tokens. See if the chunk is // a source chunk with a cache attached. var maybeCached; if ( ! chunk.length ) { // nothing to do, simulate a cache hit.. maybeCached = chunk; } else if ( chunk.cache === undefined ) { // add a cache to the chunk Object.defineProperty( chunk, 'cache', // XXX: play with cache size! { value: new ExpansionCache( 5 ), enumerable: false } ); } else { // try to retrieve cached expansion maybeCached = chunk.cache.get( this, options ); // XXX: disable caching of error messages! } if ( maybeCached ) { this.manager.env.dp( 'got cache', this.title, this._cacheKey, maybeCached ); return cb( maybeCached ); } // not cached, actually have to do some work. if ( outType === 'text/x-mediawiki/expanded' ) { // Simply wrap normal expansion ;) // XXX: Integrate this into the pipeline setup? outType = 'tokens/x-mediawiki/expanded'; var self = this, origCB = cb; cb = function( resChunk ) { var res = self.manager.env.tokensToString( resChunk ); // cache the result chunk.cache.set( self, options, res ); origCB( res ); }; } // XXX: Should perhaps create a generic from..to conversion map in // mediawiki.parser.js, at least for forward conversions. if ( outType === 'tokens/x-mediawiki/expanded' ) { if ( options.asyncCB ) { // Signal (potentially) asynchronous expansion to parent. options.asyncCB( ); } var pipeline = this.manager.pipeFactory.getPipeline( // XXX: use input type this.manager.attributeType || 'tokens/x-mediawiki', true ); pipeline.setFrame( this, null ); // In the name of interface simplicity, we accumulate all emitted // chunks in a single accumulator. var eventState = { cache: chunk.cache, options: options, accum: [], cb: cb }; pipeline.addListener( 'chunk', this.onThunkEvent.bind( this, eventState, true ) ); pipeline.addListener( 'end', this.onThunkEvent.bind( this, eventState, false ) ); if ( chunk[chunk.length - 1].constructor === EOFTk ) { pipeline.process( chunk, this.title ); } else { var newChunk = chunk.concat( this._eofTkList ); newChunk.rank = chunk.rank; pipeline.process( newChunk, this.title ); } } else { throw "Frame.expand: Unsupported output type " + outType; } }; // constant chunk terminator Frame.prototype._eofTkList = [ new EOFTk() ]; /** * Event handler for chunk conversion pipelines */ Frame.prototype.onThunkEvent = function ( state, notYetDone, ret ) { if ( notYetDone ) { state.accum = state.accum.concat(this.manager.env.stripEOFTkfromTokens( ret ) ); this.manager.env.dp( 'Frame.onThunkEvent accum:', this._cacheKey, state.accum ); } else { this.manager.env.dp( 'Frame.onThunkEvent:', this._cacheKey, state.accum ); state.cache.set( this, state.options, state.accum ); // Add cache to accum too Object.defineProperty( state.accum, 'cache', { value: state.cache, enumerable: false } ); state.cb ( state.accum ); } }; /** * Check if expanding would lead to a loop, or would exceed the * maximum expansion depth. * * @method * @param {String} Title to check. */ Frame.prototype.loopAndDepthCheck = function ( title, maxDepth ) { // XXX: set limit really low for testing! //console.warn( 'Loopcheck: ' + title + JSON.stringify( this, null, 2 ) ); if ( this.depth > maxDepth ) { // too deep //console.warn( 'Loopcheck: ' + JSON.stringify( this, null, 2 ) ); return 'Error: Expansion depth limit exceeded at '; } var elem = this; do { //console.warn( 'loop check: ' + title + ' vs ' + elem.title ); if ( elem.title === title ) { // Loop detected return 'Error: Expansion loop detected at '; } elem = elem.parentFrame; } while ( elem ); // No loop detected. return false; }; /** * ParserValue factory * * ParserValues wrap a piece of content that can be retrieved in different * expansion stages and different content types using the get() method. * Content types currently include 'tokens/x-mediawiki/expanded' for * pre-processed tokens and 'text/x-mediawiki/expanded' for pre-processed * wikitext. */ Frame.prototype.newParserValue = function ( source, options ) { // TODO: support more options: // options.type to specify source type // options.phase to specify source expansion stage if ( source.constructor === String ) { source = new String( source ); source.get = this._getID; return source; } else if ( options && options.frame ) { return new ParserValue( source, options.frame ); } else { return new ParserValue( source, this ); } }; Frame.prototype._getID = function( options ) { if ( !options || !options.cb ) { console.trace(); console.warn('Error in Frame._getID: no cb in options!'); } else { //console.warn('getID: ' + options.cb); return options.cb( this ); } }; /** * A specialized expansion cache, normally associated with a chunk of tokens. */ function ExpansionCache ( n ) { this._cache = new LRU( n ); } ExpansionCache.prototype.makeKey = function ( frame, options ) { //console.warn( frame._cacheKey ); return frame._cacheKey + options.type ; }; ExpansionCache.prototype.set = function ( frame, options, value ) { //if ( frame.title !== null ) { //console.log( 'setting cache for ' + frame.title + // ' ' + this.makeKey( frame, options ) + // ' to: ' + JSON.stringify( value ) ); return this._cache.set( this.makeKey( frame, options ), value ); //} }; ExpansionCache.prototype.get = function ( frame, options ) { return this._cache.get( this.makeKey( frame, options ) ); }; if (typeof module == "object") { module.exports.AsyncTokenTransformManager = AsyncTokenTransformManager; module.exports.SyncTokenTransformManager = SyncTokenTransformManager; module.exports.AttributeTransformManager = AttributeTransformManager; }