mediawiki-extensions-Visual.../modules/parser/mediawiki.TokenTransformManager.js
Subramanya Sastry 6999246fec More fixes on the way to readable debug/trace output.
Change-Id: I3ab637b7790c359255b8bc0ad5716da12bb25884
2012-07-20 18:12:37 -05:00

1381 lines
42 KiB
JavaScript

/**
* Token transformation managers with a (mostly) abstract
* TokenTransformManager base class and AsyncTokenTransformManager and
* SyncTokenTransformManager implementation subclasses. Individual
* transformations register for the token types they are interested in and are
* called on each matching token.
*
* Async token transformations are supported by the TokenAccumulator class,
* that manages as-early-as-possible and in-order return of tokens including
* buffering.
*
* See
* https://www.mediawiki.org/wiki/Parsoid/Token_stream_transformations
* for more documentation.
*
* @author Gabriel Wicke <gwicke@wikimedia.org>
*/
var events = require('events'),
LRU = require("lru-cache"),
jshashes = require('jshashes');
/**
* Base class for token transform managers
*
* @class
* @constructor
* @param {Function} callback, a callback function accepting a token list as
* its only argument.
*/
function TokenTransformManager( env, isInclude, pipeFactory, phaseEndRank, attributeType ) {
// Separate the constructor, so that we can call it from subclasses.
this._construct();
}
function tokenTransformersKey(tkType, tagName) {
return (tkType === 'tag') ? "tag:" + tagName.toLowerCase() : tkType;
}
// Map of: token constructor ==> transfomer type
// Used for returning active transformers for a token
TokenTransformManager.tkConstructorToTkTypeMap = {
"String" : "text",
"NlTk" : "newline",
"CommentTk" : "comment",
"EOFTk" : "end",
"TagTk" : "tag",
"EndTagTk" : "tag",
"SelfclosingTagTk" : "tag"
};
// Inherit from EventEmitter
TokenTransformManager.prototype = new events.EventEmitter();
TokenTransformManager.prototype.constructor = TokenTransformManager;
TokenTransformManager.prototype._construct = function () {
this.defaultTransformers = []; // any transforms
this.tokenTransformers = {}; // non-any transforms
this.cachedTransformers = {}; // merged any + non-any transforms
};
/**
* Register to a token source, normally the tokenizer.
* The event emitter emits a 'chunk' event with a chunk of tokens,
* and signals the end of tokens by triggering the 'end' event.
* XXX: Perform registration directly in the constructor?
*
* @method
* @param {Object} EventEmitter token even emitter.
*/
TokenTransformManager.prototype.addListenersOn = function ( tokenEmitter ) {
tokenEmitter.addListener('chunk', this.onChunk.bind( this ) );
tokenEmitter.addListener('end', this.onEndEvent.bind( this ) );
};
TokenTransformManager.prototype.setTokensRank = function ( tokens, rank ) {
for ( var i = 0, l = tokens.length; i < l; i++ ) {
tokens[i] = this.env.setTokenRank( rank, tokens[i] );
}
};
/**
* Predicate for sorting transformations by ascending rank.
* */
TokenTransformManager.prototype._cmpTransformations = function ( a, b ) {
return a.rank - b.rank;
};
/**
* Add a transform registration.
*
* @method
* @param {Function} transform.
* @param {Number} rank, [0,3) with [0,1) in-order on input token stream,
* [1,2) out-of-order and [2,3) in-order on output token stream
* @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end',
* 'martian' (unknown token), 'any' (any token, matched before other matches).
* @param {String} tag name for tags, omitted for non-tags
*/
TokenTransformManager.prototype.addTransform = function ( transformation, rank, type, name ) {
var t = {transform: transformation, rank: rank};
if (type === 'any') {
// Record the any transformation
this.defaultTransformers.push(t);
// clear cache
this.cachedTransformers = {};
} else {
var key = tokenTransformersKey(type, name);
var tArray = this.tokenTransformers[key];
if (!tArray) {
tArray = this.tokenTransformers[key] = [];
}
tArray.push(t);
tArray.sort(this._cmpTransformations);
// clear the relevant cache entry
this.cachedTransformers[key] = null;
}
};
/**
* Remove a transform registration
*
* @method
* @param {Function} transform.
* @param {Number} rank, [0,3) with [0,1) in-order on input token stream,
* [1,2) out-of-order and [2,3) in-order on output token stream
* @param {String} type, one of 'tag', 'text', 'newline', 'comment', 'end',
* 'martian' (unknown token), 'any' (any token, matched before other matches).
* @param {String} tag name for tags, omitted for non-tags
*/
TokenTransformManager.prototype.removeTransform = function ( rank, type, name ) {
function removeMatchingTransform(transformers, rank) {
var i = 0, n = transformers.length;
while (i < n && rank !== transformers[i].rank) {
i++;
}
transformers.splice(i, 1);
}
if (type === 'any') {
// Remove from default transformers
removeMatchingTransform(this.defaultTransformers, rank);
// clear cache
this.cachedTransformers = {};
} else {
var key = tokenTransformersKey(type, name);
var tArray = this.tokenTransformers[key];
if (tArray) {
removeMatchingTransform(tArray, rank);
}
// clear the relevant cache entry
this.cachedTransformers[key] = null;
}
};
/**
* Get all transforms for a given token
*/
TokenTransformManager.prototype._getTransforms = function ( token, minRank ) {
var tkType = TokenTransformManager.tkConstructorToTkTypeMap[token.constructor.name];
var key = tokenTransformersKey(tkType, token.name);
var tts = this.cachedTransformers[key];
if (!tts) {
// generate and cache -- dont cache if there are no default transformers
tts = this.tokenTransformers[key] || [];
if (this.defaultTransformers.length > 0) {
tts = tts.concat(this.defaultTransformers);
tts.sort(this._cmpTransformations);
this.cachedTransformers[key] = tts;
}
}
if ( minRank !== undefined ) {
// skip transforms <= minRank
var i = 0;
for ( l = tts.length; i < l && tts[i].rank <= minRank; i++ ) { }
return ( i && tts.slice( i ) ) || tts;
} else {
return tts;
}
};
/******************** Async token transforms: Phase 2 **********************/
/**
* Asynchronous and potentially out-of-order token transformations, used in phase 2.
*
* return protocol for individual transforms:
* { tokens: [tokens], async: true }: async expansion -> outstanding++ in parent
* { tokens: [tokens] }: fully expanded, tokens will be reprocessed
*
* @class
* @constructor
*/
function AsyncTokenTransformManager ( env, isInclude, pipeFactory, phaseEndRank, attributeType ) {
this.env = env;
this.isInclude = isInclude;
this.pipeFactory = pipeFactory;
this.phaseEndRank = phaseEndRank;
this.attributeType = attributeType;
this.setFrame( null, null, [] );
this._construct();
}
// Inherit from TokenTransformManager, and thus also from EventEmitter.
AsyncTokenTransformManager.prototype = new TokenTransformManager();
AsyncTokenTransformManager.prototype.constructor = AsyncTokenTransformManager;
/**
* Reset the internal token and outstanding-callback state of the
* TokenTransformManager, but keep registrations untouched.
*
* @method
*/
AsyncTokenTransformManager.prototype.setFrame = function ( parentFrame, title, args ) {
this.env.dp( 'AsyncTokenTransformManager.setFrame', title, args );
// First piggy-back some reset action
this.tailAccumulator = null;
// initial top-level callback, emits chunks
this.tokenCB = this.emitChunk.bind( this );
// now actually set up the frame
if (parentFrame) {
if ( title === null ) {
// attribute, simply reuse the parent frame
this.frame = parentFrame;
} else {
this.frame = parentFrame.newChild( title, this, args );
}
} else {
this.frame = new Frame(title, this, args );
}
};
/**
* Callback for async returns from head of TokenAccumulator chain
*/
AsyncTokenTransformManager.prototype.emitChunk = function( ret ) {
this.env.dp( 'emitChunk', ret );
this.emit( 'chunk', ret.tokens );
if ( ! ret.async ) {
this.emit('end');
// NOTE: This is a dummy return. We are entering async mode and
// there is no caller waiting to consume this return value.
// This is present here for parity with the return on the other branch
// and not confuse anyone wondering if there is a missing return here.
return;
} else {
// allow accumulators to go direct
return this.emitChunk.bind( this );
}
};
/**
* Simple wrapper that processes all tokens passed in
*/
AsyncTokenTransformManager.prototype.process = function ( tokens ) {
if ( ! $.isArray ( tokens ) ) {
tokens = [tokens];
}
this.onChunk( tokens );
this.onEndEvent();
};
/**
* Transform and expand tokens. Transformed token chunks will be emitted in
* the 'chunk' event.
*
* @method
* @param {Array} chunk of tokens
*/
AsyncTokenTransformManager.prototype.onChunk = function ( tokens ) {
this.env.tracer.startPass("onChunk (Async:" + this.attributeType + ")");
// Set top-level callback to next transform phase
var res = this.transformTokens ( tokens, this.tokenCB );
this.env.dp( 'AsyncTokenTransformManager onChunk', res.async? 'async' : 'sync', res.tokens );
// Emit or append the returned tokens
if ( ! this.tailAccumulator ) {
this.env.dp( 'emitting' );
this.emit( 'chunk', res.tokens );
} else {
this.env.dp( 'appending to tail' );
this.tailAccumulator.append( res.tokens );
}
// Update the tail of the current accumulator chain
if ( res.async ) {
this.tailAccumulator = res.async;
this.tokenCB = res.async.getParentCB ( 'sibling' );
}
this.env.tracer.endPass("onChunk (Async:" + this.attributeType + ")");
};
/**
* Callback for the end event emitted from the tokenizer.
* Either signals the end of input to the tail of an ongoing asynchronous
* processing pipeline, or directly emits 'end' if the processing was fully
* synchronous.
*/
AsyncTokenTransformManager.prototype.onEndEvent = function () {
if ( this.tailAccumulator ) {
this.env.dp( 'AsyncTokenTransformManager.onEndEvent: calling siblingDone',
this.frame.title );
this.tailAccumulator.siblingDone();
} else {
// nothing was asynchronous, so we'll have to emit end here.
this.env.dp( 'AsyncTokenTransformManager.onEndEvent: synchronous done',
this.frame.title );
this.emit('end');
//this._reset();
}
};
/**
* Utility method to set up a new TokenAccumulator with the right callbacks.
*/
AsyncTokenTransformManager.prototype._makeNextAccum = function( cb, state ) {
var newAccum = new TokenAccumulator( this, cb );
var _cbs = { parent: newAccum.getParentCB( 'child' ) };
var newCB = this.maybeSyncReturn.bind( this, state, _cbs );
_cbs.self = newCB;
return { accum: newAccum, cb: newCB };
};
// Debug counter, provides an UID for transformTokens calls so that callbacks
// associated with it can be identified in debugging output as c-XXX.
AsyncTokenTransformManager.prototype._counter = 0;
/**
* Run asynchronous transformations. This is the big workhorse where
* templates, images, links and other async expansions (see the transform
* recipe mediawiki.parser.js) are processed.
*
* @param tokens {Array}: Chunk of tokens, potentially with rank and other
* meta information associated with it.
* @param parentCB {Function}: callback for asynchronous results
* @returns {Object}: { tokens: [], async: falsy or the tail TokenAccumulator }
* The returned chunk is fully expanded for this phase, and the rank set
* to reflect this.
*/
AsyncTokenTransformManager.prototype.transformTokens = function ( tokens, parentCB ) {
//console.warn('AsyncTokenTransformManager.transformTokens: ' + JSON.stringify(tokens) );
var inputRank = tokens.rank || 0,
localAccum = [], // a local accum for synchronously returned fully processed tokens
activeAccum = localAccum, // start out collecting tokens in localAccum
// until the first async transform is hit
workStack = [], // stack of stacks (reversed chunks) of tokens returned
// from transforms to process before consuming the next
// input token
token, // currently processed token
s = { // Shared state accessible to synchronous transforms in this.maybeSyncReturn
transforming: true,
res: {},
// debug id for this expansion
c: 'c-' + AsyncTokenTransformManager.prototype._counter++
},
minRank;
// make localAccum compatible with getParentCB('sibling')
localAccum.getParentCB = function() { return parentCB; };
var nextAccum = this._makeNextAccum( parentCB, s );
var i = 0,
l = tokens.length;
while ( i < l || workStack.length ) {
if ( workStack.length ) {
var curChunk = workStack[workStack.length - 1];
minRank = curChunk.rank || inputRank;
token = curChunk.pop();
if ( !curChunk.length ) {
// activate nextActiveAccum after consuming the chunk
if ( curChunk.nextActiveAccum ) {
if ( activeAccum !== curChunk.oldActiveAccum ) {
// update the callback of the next active accum
curChunk.nextActiveAccum.setParentCB( activeAccum.getParentCB('sibling') );
}
activeAccum = curChunk.nextActiveAccum;
// create new accum and cb for transforms
nextAccum = this._makeNextAccum( activeAccum.getParentCB('sibling'), s );
}
// remove empty chunk from workstack
workStack.pop();
}
} else {
token = tokens[i];
i++;
minRank = inputRank;
if ( token.constructor === Array ) {
if ( ! token.length ) {
// skip it
} else if ( ! token.rank || token.rank < this.phaseEndRank ) {
workStack.push( token );
} else {
// don't process the array in this phase.
activeAccum.push( token );
}
continue;
} else if ( token.constructor === ParserValue ) {
// Parser functions etc that run before full attribute
// expansion are responsible for the full expansion of
// returned attributes in their respective environments.
throw( 'Unexpected ParserValue in AsyncTokenTransformManager.transformTokens:' +
JSON.stringify( token ) );
}
}
this.env.tracer.processToken(token);
var ts = this._getTransforms( token, minRank );
//this.env.dp( 'async token:', s.c, token, minRank, ts );
if ( ! ts.length ) {
// nothing to do for this token
activeAccum.push( token );
} else {
//this.env.tp( 'async trans' );
for (var j = 0, lts = ts.length; j < lts; j++ ) {
var transformer = ts[j];
s.res = { };
// Transform the token.
transformer.transform( token, this.frame, nextAccum.cb );
var resTokens = s.res.tokens;
//this.env.dp( 's.res:', s.c, s.res );
// Check the result, which is changed using the
// maybeSyncReturn callback
if ( resTokens && resTokens.length ) {
if ( resTokens.length === 1 ) {
if ( resTokens[0] === undefined ) {
console.warn('transformer ' + transformer.rank +
' returned undefined token!');
resTokens.shift();
break;
}
if ( token === resTokens[0] && ! resTokens.rank ) {
// token not modified, continue with
// transforms.
token = resTokens[0];
continue;
} else if (
resTokens.rank === this.phaseEndRank ||
( resTokens[0].constructor === String &&
! this.tokenTransformers.text ) )
{
// Fast path for text token, and nothing to do for it
// Abort processing, but treat token as done.
token = resTokens[0];
break;
}
}
// token(s) were potentially modified
if ( ! resTokens.rank || resTokens.rank < this.phaseEndRank ) {
// There might still be something to do for these
// tokens. Prepare them for the workStack.
var revTokens = resTokens.slice();
revTokens.reverse();
// Don't apply earlier transforms to results of a
// transformer to avoid loops and keep the
// execution model sane.
revTokens.rank = resTokens.rank || transformer.rank;
//revTokens.rank = Math.max( resTokens.rank || 0, transformer.rank );
revTokens.oldActiveAccum = activeAccum;
workStack.push( revTokens );
if ( s.res.async ) {
revTokens.nextActiveAccum = nextAccum.accum;
}
// create new accum and cb for transforms
//activeAccum = nextAccum.accum;
nextAccum = this._makeNextAccum( activeAccum.getParentCB('sibling'), s );
// don't trigger activeAccum switch / _makeNextAccum call below
s.res.async = false;
this.env.dp( 'workStack', s.c, revTokens.rank, workStack );
}
}
// Abort processing for this token
token = null;
break;
}
if ( token !== null ) {
// token is done.
// push to accumulator
//console.warn( 'pushing ' + token );
activeAccum.push( token );
}
if ( s.res.async ) {
this.env.dp( 'res.async, creating new TokenAccumulator', s.c );
// The child now switched to activeAccum, we have to create a new
// accumulator for the next potential child.
activeAccum = nextAccum.accum;
nextAccum = this._makeNextAccum( activeAccum.getParentCB('sibling'), s );
}
}
}
// we are no longer transforming, maybeSyncReturn needs to follow the
// async code path
s.transforming = false;
// All tokens in localAccum are fully processed
localAccum.rank = this.phaseEndRank;
this.env.dp( 'localAccum',
activeAccum !== localAccum ? 'async' : 'sync',
s.c,
localAccum );
// Return finished tokens directly to caller, and indicate if further
// async actions are outstanding. The caller needs to point a sibling to
// the returned accumulator, or call .siblingDone() to mark the end of a
// chain.
var retAccum = activeAccum !== localAccum ? activeAccum : null;
return { tokens: localAccum, async: retAccum };
};
/**
* Callback for async transforms
*
* Converts direct callbacks into a synchronous return by collecting the
* results in s.res. Re-start transformTokens for any async returns, and calls
* the provided asyncCB (TokenAccumulator._returnTokens normally).
*/
AsyncTokenTransformManager.prototype.maybeSyncReturn = function ( s, cbs, ret ) {
if ( s.transforming ) {
if ( ! ret ) {
// support empty callbacks, for simple async signalling
s.res.async = true;
return;
}
// transformTokens is still ongoing, handle as sync return by
// collecting the results in s.res
this.env.dp( 'maybeSyncReturn transforming', s.c, ret );
if ( ret.tokens ) {
if ( ret.tokens.constructor !== Array ) {
ret.tokens = [ ret.tokens ];
}
if ( s.res.tokens ) {
var oldRank = s.res.tokens.rank;
s.res.tokens = s.res.tokens.concat( ret.tokens );
if ( oldRank && ret.tokens.rank ) {
// Conservatively set the overall rank to the minimum.
// This assumes that multi-pass expansion for some tokens
// is safe. We might want to revisit that later.
Math.min( oldRank, ret.tokens.rank );
}
s.res.async = ret.async;
} else {
s.res = ret;
}
} else if ( ret.constructor === Array ) {
console.trace();
}
s.res.async = ret.async;
//console.trace();
} else if ( ret !== undefined ) {
// Since the original transformTokens call is already done, we have to
// re-start application of any remaining transforms here.
this.env.dp( 'maybeSyncReturn async', s.c, ret );
var asyncCB = cbs.parent,
tokens = ret.tokens;
if ( tokens && tokens.length &&
( ! tokens.rank || tokens.rank < this.phaseEndRank ) &&
! ( tokens.length === 1 && tokens[0].constructor === String ) ) {
// Re-process incomplete tokens
this.env.dp( 'maybeSyncReturn: recursive transformTokens',
this.frame.title, ret.tokens );
// Set up a new child callback with its own callback state
var _cbs = { async: cbs.parent },
childCB = this.maybeSyncReturn.bind( this, s, _cbs );
_cbs.self = childCB;
var res = this.transformTokens( ret.tokens, childCB );
ret.tokens = res.tokens;
if ( res.async ) {
// Insert new child accumulator chain- any further chunks from
// the transform will be passed as sibling to the last accum
// in this chain, and the new chain will pass its results to
// the former parent accumulator.
if ( ! ret.async ) {
// There will be no more input to the child pipeline
res.async.siblingDone();
// We need to indicate that more results will follow from
// the child pipeline.
ret.async = true;
} else {
// More tokens will follow from original expand.
// Need to return results of recursive expand *before* further
// async results, so we simply pass further results to the
// last accumulator in the new chain.
cbs.parent = res.async.getParentCB( 'sibling' );
}
}
}
asyncCB( ret );
if ( ret.async ) {
// Pass reference to maybeSyncReturn to TokenAccumulators to allow
// them to call directly
return cbs.self;
}
}
};
/*************** In-order, synchronous transformer (phase 1 and 3) ***************/
/**
* Subclass for phase 3, in-order and synchronous processing.
*
* @class
* @constructor
* @param {Object} environment.
*/
function SyncTokenTransformManager ( env, isInclude, pipeFactory, phaseEndRank, attributeType ) {
this.env = env;
this.isInclude = isInclude;
this.pipeFactory = pipeFactory;
this.phaseEndRank = phaseEndRank;
this.attributeType = attributeType;
this._construct();
}
// Inherit from TokenTransformManager, and thus also from EventEmitter.
SyncTokenTransformManager.prototype = new TokenTransformManager();
SyncTokenTransformManager.prototype.constructor = SyncTokenTransformManager;
SyncTokenTransformManager.prototype.process = function ( tokens ) {
if ( ! $.isArray ( tokens ) ) {
tokens = [tokens];
}
this.onChunk( tokens );
this.onEndEvent();
};
/**
* Global in-order and synchronous traversal on token stream. Emits
* transformed chunks of tokens in the 'chunk' event.
*
* @method
* @param {Array} Token chunk.
*/
SyncTokenTransformManager.prototype.onChunk = function ( tokens ) {
this.env.tracer.startPass("onChunk (Sync:" + this.attributeType + ")");
this.env.dp( 'SyncTokenTransformManager.onChunk, input: ', tokens );
var res,
localAccum = [],
localAccumLength = 0,
workStack = [], // stack of stacks of tokens returned from transforms
// to process before consuming the next input token
token,
// Top-level frame only in phase 3, as everything is already expanded.
ts, transformer,
aborted, minRank;
for ( var i = 0, l = tokens.length; i < l || workStack.length; i++ ) {
aborted = false;
if ( workStack.length ) {
i--;
var curChunk = workStack[workStack.length - 1];
minRank = curChunk.rank || tokens.rank || this.phaseEndRank - 1;
token = curChunk.pop();
if ( !curChunk.length ) {
// remove empty chunk
workStack.pop();
}
} else {
token = tokens[i];
minRank = tokens.rank || this.phaseEndRank - 1;
}
this.env.tracer.processToken(token);
res = { token: token };
ts = this._getTransforms( token, minRank );
//this.env.dp( 'sync tok:', minRank, token.rank, token, ts );
for (var j = 0, lts = ts.length; j < lts; j++ ) {
transformer = ts[j];
// Transform the token.
res = transformer.transform( token, this, this.prevToken );
//this.env.dp( 'sync res0:', res );
if ( res.token !== token ) {
aborted = true;
if ( res.token ) {
res.tokens = [res.token];
delete res.token;
}
break;
}
token = res.token;
}
//this.env.dp( 'sync res:', res );
if( res.tokens && res.tokens.length ) {
// Splice in the returned tokens (while replacing the original
// token), and process them next.
var revTokens = res.tokens.slice();
revTokens.reverse();
revTokens.rank = transformer.rank;
workStack.push( revTokens );
} else if ( res.token ) {
localAccum.push(res.token);
this.prevToken = res.token;
} else {
this.prevToken = token;
}
}
localAccum.rank = this.phaseEndRank;
localAccum.cache = tokens.cache;
this.env.dp( 'SyncTokenTransformManager.onChunk: emitting ', localAccum );
this.env.tracer.endPass("onChunk (Sync:" + this.attributeType + ")");
this.emit( 'chunk', localAccum );
};
/**
* Callback for the end event emitted from the tokenizer.
* Either signals the end of input to the tail of an ongoing asynchronous
* processing pipeline, or directly emits 'end' if the processing was fully
* synchronous.
*/
SyncTokenTransformManager.prototype.onEndEvent = function () {
// This phase is fully synchronous, so just pass the end along and prepare
// for the next round.
this.emit('end');
};
/********************** AttributeTransformManager *************************/
/**
* Utility transformation manager for attributes, using an attribute
* transformation pipeline (normally phase1 SyncTokenTransformManager and
* phase2 AsyncTokenTransformManager). This pipeline needs to be independent
* of the containing TokenTransformManager to isolate transforms from each
* other. The AttributeTransformManager returns its result by calling the
* supplied callback.
*
* @class
* @constructor
* @param {Object} Containing AsyncTokenTransformManager
* @param {Function} Callback function, called with expanded attribute array.
*/
function AttributeTransformManager ( manager, callback ) {
this.manager = manager;
this.frame = this.manager.frame;
this.callback = callback;
this.outstanding = 1;
this.kvs = [];
//this.pipe = manager.getAttributePipeline( manager.args );
}
// A few constants
AttributeTransformManager.prototype._toType = 'tokens/x-mediawiki/expanded';
/**
* Expand both key and values of all key/value pairs. Used for generic
* (non-template) tokens in the AttributeExpander handler, which runs after
* templates are already expanded.
*/
AttributeTransformManager.prototype.process = function ( attributes ) {
var pipe,
ref;
//console.warn( 'AttributeTransformManager.process: ' + JSON.stringify( attributes ) );
// transform each argument (key and value), and handle asynchronous returns
for ( var i = 0, l = attributes.length; i < l; i++ ) {
var cur = attributes[i];
// fast path for string-only attributes
if ( cur.k.constructor === String && cur.v.constructor === String ) {
this.kvs.push( cur );
continue;
}
var kv = new KV( [], [] );
this.kvs.push( kv );
if ( cur.v.constructor === Array && cur.v.length ) {
// Assume that the return is async, will be decremented in callback
this.outstanding++;
// transform the value
this.frame.expand( cur.v,
{
type: this._toType,
cb: this._returnAttributeValue.bind( this, i )
} );
} else {
kv.v = cur.v;
}
if ( cur.k.constructor === Array && cur.k.length ) {
// Assume that the return is async, will be decremented in callback
this.outstanding++;
// transform the key
this.frame.expand( cur.k,
{
type: this._toType,
cb: this._returnAttributeKey.bind( this, i )
} );
} else {
kv.k = cur.k;
}
}
this.outstanding--;
if ( this.outstanding === 0 ) {
// synchronous, done
this.callback( this.kvs );
}
};
/**
* Expand only keys of key/value pairs. This is generally used for template
* parameters to avoid expanding unused values, which is very important for
* constructs like switches.
*/
AttributeTransformManager.prototype.processKeys = function ( attributes ) {
var pipe,
ref;
//console.warn( 'AttributeTransformManager.process: ' + JSON.stringify( attributes ) );
// TODO: wrap in chunk and call
// .get( { type: 'text/x-mediawiki/expanded' } ) on it
// transform the key for each attribute pair
var kv;
for ( var i = 0, l = attributes.length; i < l; i++ ) {
var cur = attributes[i];
// fast path for string-only attributes
if ( cur.k.constructor === String && cur.v.constructor === String ) {
kv = new KV( cur.k, this.frame.newParserValue( cur.v ) );
this.kvs.push( kv );
continue;
}
// Wrap the value in a ParserValue for lazy expansion
kv = new KV( [], this.frame.newParserValue( cur.v ) );
this.kvs.push( kv );
// And expand the key, if needed
if ( cur.k.constructor === Array && cur.k.length && ! cur.k.get ) {
// Assume that the return is async, will be decremented in callback
this.outstanding++;
// transform the key
this.frame.expand( cur.k,
{
type: this._toType,
cb: this._returnAttributeKey.bind( this, i )
} );
} else {
kv.k = cur.k;
}
}
this.outstanding--;
if ( this.outstanding === 0 ) {
// synchronously done
this.callback( this.kvs );
}
};
/**
* Only expand values of attribute key/value pairs.
*/
AttributeTransformManager.prototype.processValues = function ( attributes ) {
// Potentially need to use multiple pipelines to support concurrent async expansion
//this.pipe.process(
var pipe,
ref;
//console.warn( 'AttributeTransformManager.process: ' + JSON.stringify( attributes ) );
// transform each value
for ( var i = 0, l = attributes.length; i < l; i++ ) {
var cur = attributes[i];
var kv = new KV( cur.k, [] );
this.kvs.push( kv );
if ( ! cur ) {
console.warn( JSON.stringify( attributes ) );
console.trace();
continue;
}
if ( cur.v.constructor === Array && cur.v.length ) {
// Assume that the return is async, will be decremented in callback
this.outstanding++;
// transform the value
this.frame.expand( cur.v,
{
type: this._toType,
cb: this._returnAttributeValue.bind( this, i )
} );
} else {
kv.value = cur.v;
}
}
this.outstanding--;
if ( this.outstanding === 0 ) {
// synchronously done
this.callback( this.kvs );
}
};
/**
* Callback for async argument value expansions
*/
AttributeTransformManager.prototype._returnAttributeValue = function ( ref, tokens ) {
this.manager.env.dp( 'check _returnAttributeValue: ', ref, tokens );
this.kvs[ref].v = tokens;
this.kvs[ref].v = this.manager.env.stripEOFTkfromTokens( this.kvs[ref].v );
this.outstanding--;
if ( this.outstanding === 0 ) {
this.callback( this.kvs );
}
};
/**
* Callback for async argument key expansions
*/
AttributeTransformManager.prototype._returnAttributeKey = function ( ref, tokens ) {
//console.warn( 'check _returnAttributeKey: ' + JSON.stringify( tokens ) );
this.kvs[ref].k = tokens;
this.kvs[ref].k = this.manager.env.stripEOFTkfromTokens( this.kvs[ref].k );
if ( this.kvs[ref].v === '' ) {
// FIXME: use tokenizer production to properly parse this
var m = this.manager.env.tokensToString( this.kvs[ref].k )
.match( /([^=]+)=['"]?([^'"]*)['"]?$/ );
if ( m ) {
this.kvs[ref].k = m[1];
this.kvs[ref].v = m[2];
//console.warn( m + JSON.stringify( this.kvs[ref] ) );
}
}
this.outstanding--;
if ( this.outstanding === 0 ) {
this.callback( this.kvs );
}
};
/******************************* TokenAccumulator *************************/
/**
* Token accumulators buffer tokens between asynchronous processing points,
* and return fully processed token chunks in-order and as soon as possible.
* They support the AsyncTokenTransformManager.
*
* @class
* @constructor
* @param {Object} next TokenAccumulator to link to
* @param {Array} (optional) tokens, init accumulator with tokens or []
*/
function TokenAccumulator ( manager, parentCB ) {
this.manager = manager;
this.parentCB = parentCB;
this.accum = [];
// Wait for child and sibling by default
// Note: Need to decrement outstanding on last accum
// in a chain.
this.outstanding = 2;
}
/**
* Curry a parentCB with the object and reference.
*
* @method
* @param {Object} TokenAccumulator
* @param {misc} Reference / key for callback
* @returns {Function}
*/
TokenAccumulator.prototype.getParentCB = function ( reference ) {
return this._returnTokens.bind( this, reference );
};
TokenAccumulator.prototype.setParentCB = function ( cb ) {
this.parentCB = cb;
};
/**
* Pass tokens to an accumulator
*
* @method
* @param {String}: reference, 'child' or 'sibling'.
* @param {Object}: { tokens, async }
* @returns {Mixed}: new parent callback for caller or falsy value
*/
TokenAccumulator.prototype._returnTokens = function ( reference, ret ) {
var cb,
returnTokens = [];
if ( ! ret.async ) {
this.outstanding--;
}
this.manager.env.dp( 'TokenAccumulator._returnTokens', reference, ret );
// FIXME
if ( ret.tokens === undefined ) {
if ( this.manager.env.debug ) {
console.warn( 'ret.tokens undefined: ' + JSON.stringify( ret ) );
console.trace();
}
if ( ret.token !== undefined ) {
ret.tokens = [ret.token];
} else {
ret.tokens = [];
}
}
if ( reference === 'child' ) {
var res = {};
if ( !ret.async ) {
// empty accum too
ret.tokens = ret.tokens.concat( this.accum );
this.accum = [];
}
//this.manager.env.dp( 'TokenAccumulator._returnTokens child: ',
// tokens, ' outstanding: ', this.outstanding );
ret.tokens.rank = this.manager.phaseEndRank;
ret.async = this.outstanding;
this._callParentCB( ret );
return null;
} else {
// sibling
if ( this.outstanding === 0 ) {
ret.tokens = this.accum.concat( ret.tokens );
// A sibling has already transformed child tokens, so we don't
// have to do this again.
//this.manager.env.dp( 'TokenAccumulator._returnTokens: ',
// 'sibling done and parentCB ',
// tokens );
ret.tokens.rank = this.manager.phaseEndRank;
ret.async = false;
this.parentCB( ret );
return null;
} else if ( this.outstanding === 1 && ret.async ) {
// Sibling is not yet done, but child is. Return own parentCB to
// allow the sibling to go direct, and call back parent with
// tokens. The internal accumulator is empty at this stage, as its
// tokens are passed to the parent when the child is done.
ret.tokens.rank = this.manager.phaseEndRank;
return this._callParentCB( ret );
} else {
// sibling tokens are always fully processed for this phase, so we
// can directly concatenate them here.
this.accum = this.accum.concat( ret.tokens );
this.manager.env.dp( 'TokenAccumulator._returnTokens: sibling done, but not overall. async=',
ret.async, ', this.outstanding=', this.outstanding,
', this.accum=', this.accum, ' frame.title=', this.manager.frame.title );
}
}
};
/**
* Mark the sibling as done (normally at the tail of a chain).
*/
TokenAccumulator.prototype.siblingDone = function () {
//console.warn( 'TokenAccumulator.siblingDone: ' );
this._returnTokens ( 'sibling', { tokens: [], async: false } );
};
TokenAccumulator.prototype._callParentCB = function ( ret ) {
var cb = this.parentCB( ret );
if ( cb ) {
this.parentCB = cb;
}
return this.parentCB;
};
/**
* Push a token into the accumulator
*
* @method
* @param {Object} token
*/
TokenAccumulator.prototype.push = function ( token ) {
return this.accum.push(token);
};
/**
* Append tokens to an accumulator
*
* @method
* @param {Object} token
*/
TokenAccumulator.prototype.append = function ( token ) {
this.accum = this.accum.concat( token );
};
/******************************* Frame ******************************/
/**
* The Frame object
*
* A frame represents a template expansion scope including parameters passed
* to the template (args). It provides a generic 'expand' method which
* expands / converts individual parameter values in its scope. It also
* provides methods to check if another expansion would lead to loops or
* exceed the maximum expansion depth.
*/
function Frame ( title, manager, args, parentFrame ) {
this.title = title;
this.manager = manager;
this.args = new Params( this.manager.env, args );
// cache key fragment for expansion cache
// FIXME: better use fully expand args for the cache key! Can avoid using
// the parent cache keys in that case.
var MD5 = new jshashes.MD5();
if ( args._cacheKey === undefined ) {
args._cacheKey = MD5.hex( JSON.stringify( args ) );
}
if ( parentFrame ) {
this.parentFrame = parentFrame;
this.depth = parentFrame.depth + 1;
this._cacheKey = MD5.hex( parentFrame._cacheKey + args._cacheKey );
} else {
this.parentFrame = null;
this.depth = 0;
this._cacheKey = args._cacheKey;
}
}
/**
* Create a new child frame
*/
Frame.prototype.newChild = function ( title, manager, args ) {
return new Frame( title, manager, args, this );
};
/**
* Expand / convert a thunk (a chunk of tokens not yet fully expanded).
*
* XXX: support different input formats, expansion phases / flags and more
* output formats.
*/
Frame.prototype.expand = function ( chunk, options ) {
var outType = options.type || 'text/x-mediawiki/expanded';
var cb = options.cb || console.warn( JSON.stringify( options ) );
this.manager.env.dp( 'Frame.expand', this._cacheKey, chunk );
if ( chunk.constructor === String ) {
// Plain text remains text. Nothing to do.
if ( outType !== 'text/x-mediawiki/expanded' ) {
return cb( [ chunk ] );
} else {
return cb( chunk );
}
} else if ( chunk.constructor === ParserValue ) {
// Delegate to ParserValue
return chunk.get( options );
}
// We are now dealing with an Array of tokens. See if the chunk is
// a source chunk with a cache attached.
var maybeCached;
if ( ! chunk.length ) {
// nothing to do, simulate a cache hit..
maybeCached = chunk;
} else if ( chunk.cache === undefined ) {
// add a cache to the chunk
Object.defineProperty( chunk, 'cache',
// XXX: play with cache size!
{ value: new ExpansionCache( 5 ), enumerable: false } );
} else {
// try to retrieve cached expansion
maybeCached = chunk.cache.get( this, options );
// XXX: disable caching of error messages!
}
if ( maybeCached ) {
this.manager.env.dp( 'got cache', this.title, this._cacheKey, maybeCached );
return cb( maybeCached );
}
// not cached, actually have to do some work.
if ( outType === 'text/x-mediawiki/expanded' ) {
// Simply wrap normal expansion ;)
// XXX: Integrate this into the pipeline setup?
outType = 'tokens/x-mediawiki/expanded';
var self = this,
origCB = cb;
cb = function( resChunk ) {
var res = self.manager.env.tokensToString( resChunk );
// cache the result
chunk.cache.set( self, options, res );
origCB( res );
};
}
// XXX: Should perhaps create a generic from..to conversion map in
// mediawiki.parser.js, at least for forward conversions.
if ( outType === 'tokens/x-mediawiki/expanded' ) {
if ( options.asyncCB ) {
// Signal (potentially) asynchronous expansion to parent.
options.asyncCB( );
}
var pipeline = this.manager.pipeFactory.getPipeline(
// XXX: use input type
this.manager.attributeType || 'tokens/x-mediawiki', true
);
pipeline.setFrame( this, null );
// In the name of interface simplicity, we accumulate all emitted
// chunks in a single accumulator.
var eventState = { cache: chunk.cache, options: options, accum: [], cb: cb };
pipeline.addListener( 'chunk',
this.onThunkEvent.bind( this, eventState, true ) );
pipeline.addListener( 'end',
this.onThunkEvent.bind( this, eventState, false ) );
if ( chunk[chunk.length - 1].constructor === EOFTk ) {
pipeline.process( chunk, this.title );
} else {
var newChunk = chunk.concat( this._eofTkList );
newChunk.rank = chunk.rank;
pipeline.process( newChunk, this.title );
}
} else {
throw "Frame.expand: Unsupported output type " + outType;
}
};
// constant chunk terminator
Frame.prototype._eofTkList = [ new EOFTk() ];
/**
* Event handler for chunk conversion pipelines
*/
Frame.prototype.onThunkEvent = function ( state, notYetDone, ret ) {
if ( notYetDone ) {
state.accum = state.accum.concat(this.manager.env.stripEOFTkfromTokens( ret ) );
this.manager.env.dp( 'Frame.onThunkEvent accum:', this._cacheKey, state.accum );
} else {
this.manager.env.dp( 'Frame.onThunkEvent:', this._cacheKey, state.accum );
state.cache.set( this, state.options, state.accum );
// Add cache to accum too
Object.defineProperty( state.accum, 'cache',
{ value: state.cache, enumerable: false } );
state.cb ( state.accum );
}
};
/**
* Check if expanding <title> would lead to a loop, or would exceed the
* maximum expansion depth.
*
* @method
* @param {String} Title to check.
*/
Frame.prototype.loopAndDepthCheck = function ( title, maxDepth ) {
// XXX: set limit really low for testing!
//console.warn( 'Loopcheck: ' + title + JSON.stringify( this, null, 2 ) );
if ( this.depth > maxDepth ) {
// too deep
//console.warn( 'Loopcheck: ' + JSON.stringify( this, null, 2 ) );
return 'Error: Expansion depth limit exceeded at ';
}
var elem = this;
do {
//console.warn( 'loop check: ' + title + ' vs ' + elem.title );
if ( elem.title === title ) {
// Loop detected
return 'Error: Expansion loop detected at ';
}
elem = elem.parentFrame;
} while ( elem );
// No loop detected.
return false;
};
/**
* ParserValue factory
*
* ParserValues wrap a piece of content that can be retrieved in different
* expansion stages and different content types using the get() method.
* Content types currently include 'tokens/x-mediawiki/expanded' for
* pre-processed tokens and 'text/x-mediawiki/expanded' for pre-processed
* wikitext.
*/
Frame.prototype.newParserValue = function ( source, options ) {
// TODO: support more options:
// options.type to specify source type
// options.phase to specify source expansion stage
if ( source.constructor === String ) {
source = new String( source );
source.get = this._getID;
return source;
} else if ( options && options.frame ) {
return new ParserValue( source, options.frame );
} else {
return new ParserValue( source, this );
}
};
Frame.prototype._getID = function( options ) {
if ( !options || !options.cb ) {
console.trace();
console.warn('Error in Frame._getID: no cb in options!');
} else {
//console.warn('getID: ' + options.cb);
return options.cb( this );
}
};
/**
* A specialized expansion cache, normally associated with a chunk of tokens.
*/
function ExpansionCache ( n ) {
this._cache = new LRU( n );
}
ExpansionCache.prototype.makeKey = function ( frame, options ) {
//console.warn( frame._cacheKey );
return frame._cacheKey + options.type ;
};
ExpansionCache.prototype.set = function ( frame, options, value ) {
//if ( frame.title !== null ) {
//console.log( 'setting cache for ' + frame.title +
// ' ' + this.makeKey( frame, options ) +
// ' to: ' + JSON.stringify( value ) );
return this._cache.set( this.makeKey( frame, options ), value );
//}
};
ExpansionCache.prototype.get = function ( frame, options ) {
return this._cache.get( this.makeKey( frame, options ) );
};
if (typeof module == "object") {
module.exports.AsyncTokenTransformManager = AsyncTokenTransformManager;
module.exports.SyncTokenTransformManager = SyncTokenTransformManager;
module.exports.AttributeTransformManager = AttributeTransformManager;
}