More work towards template expansion.

* Created AttributeTokenTransformManager for generic attribute conversion, and
  removed { title, template argument {key, value} } expansion from
  TemplateHandler.
* Added caching for attribute and input sub-pipelines. Especially attribute
  pipelines would otherwise be recreated for each attribute value and key.
This commit is contained in:
Gabriel Wicke 2012-01-11 00:05:51 +00:00
parent 7333f606b1
commit 6b6ec2933d
Notes: Gabriel Wicke 2012-02-27 16:40:01 +00:00
3 changed files with 226 additions and 124 deletions

View file

@ -46,121 +46,57 @@ TemplateHandler.prototype.register = function ( manager ) {
*/
TemplateHandler.prototype.onTemplate = function ( token, cb ) {
this.parentCB = cb;
this.origToken = token;
// check for 'subst:'
// check for variable magic names
// check for msg, msgnw, raw magics
// check for parser functions
// create a new temporary frame for argument and title expansions
var templateExpandData = {
var templateTokenTransformData = {
args: {},
env: frame.env,
outstanding: 0,
cb: cb,
origToken: token
origToken: token,
isAsync: false
},
transformCB,
i = 0,
kvs = [],
res,
kv;
// XXX: transform the target
transformCB = this._returnArgValue.bind( this, { frame: templateExpandData } );
res = this.manager.transformTokens( token.target, transformCB );
if ( res.async ) {
newFrame.outstanding++;
}
newFrame.target = res.tokens;
// transform each argument (key and value), and handle asynchronous returns
for ( var key in token.args ) {
if ( token.hasOwnProperty( key ) ) {
kv = { key: [], value: [] };
// transform the value
transformCB = this._returnArgValue.bind( this, { frame: templateExpandData, index: i } );
res = this.manager.transformTokens( args[key], transformCB );
if ( res.async ) {
newFrame.outstanding++;
}
kv.value = res.tokens;
var attributes = [['', token.target]].concat( token.args );
// XXX: transform key too, and store it in the token's value for
// the original key
// For now, we assume the key to be a string.
kv.key = key;
new AttributeTransformer( this.manager,
this._returnAttributes.bind( this, templateTokenTransformData )
).process( attributes );
// finally, append to kvs
kvs.push( kv );
i++;
}
}
// Move the above to AttributeTransformer class
if ( newFrame.outstanding === 0 ) {
return this._expandTemplate ( newFrame );
if ( templateExpandData.outstanding === 0 ) {
return this._expandTemplate ( templateTokenTransformData );
} else {
templateTokenTransformData.isAsync = true;
return { async: true };
}
};
/**
* Callback for async argument value expansions
*/
TemplateHandler.prototype._returnArgValue = function ( ref, tokens, notYetDone ) {
var frame = ref.frame;
frame.args[ref.index].value.push( tokens );
if ( ! notYetDone ) {
frame.outstanding--;
if ( frame.outstanding === 0 ) {
// this calls back to frame.cb, so no return here.
this._expandTemplate( frame );
}
TemplateHandler.prototype._returnAttributes = function ( templateTokenTransformData, attributes ) {
// Remove the target from the attributes
templateTokenTransformData.target = attributes[0][1];
attributes.shift();
templateTokenTransformData.expandedArgs = attributes;
if ( templateTokenTransformData.isAsync ) {
this._expandTemplate ( templateTokenTransformData );
}
};
/**
* Callback for async argument key expansions
*/
TemplateHandler.prototype._returnArgKey = function ( ref, tokens, notYetDone ) {
var frame = ref.frame;
frame.args[ref.index].key.push( tokens );
if ( ! notYetDone ) {
frame.outstanding--;
if ( frame.outstanding === 0 ) {
// this calls back to frame.cb, so no return here.
this._expandTemplate( frame );
}
}
};
/**
* Callback for async target expansion
*/
TemplateHandler.prototype._returnTarget = function ( ref, tokens, notYetDone ) {
var frame = ref.frame;
frame.target.push( tokens );
if ( ! notYetDone ) {
frame.outstanding--;
if ( frame.outstanding === 0 ) {
// this calls back to frame.cb, so no return here.
this._expandTemplate( frame );
}
}
};
}
/**
* Fetch, tokenize and token-transform a template after all arguments and the
* target were expanded in frame.
*/
TemplateHandler.prototype._expandTemplate = function ( frame ) {
TemplateHandler.prototype._expandTemplate = function ( templateTokenTransformData ) {
// First, check the target for loops
this.manager.loopCheck.check( frame.target );
this.manager.loopCheck.check( templateTokenTransformData.target );
// Create a new nested transformation pipeline for the input type
// (includes the tokenizer and synchronous stage-1 transforms for
@ -198,8 +134,6 @@ TemplateHandler.prototype._expandTemplate = function ( frame ) {
// recursion depth check
// fetch from DB or interwiki
// infinte loop check
//
// TODO: template fetching is already implemented there, copy this over!
};

View file

@ -348,7 +348,7 @@ AsyncTokenTransformManager.prototype.newChildPipeline = function ( inputType, ar
* @returns {Object} Pipeline, which is an object with 'first' pointing to the
* first stage of the pipeline, and 'last' pointing to the last stage.
*/
AsyncTokenTransformManager.prototype.newAttributePipeline = function ( inputType, args ) {
AsyncTokenTransformManager.prototype.getAttributePipeline = function ( inputType, args ) {
return this.childFactories.attributes( inputType, args );
};
@ -641,7 +641,8 @@ SyncTokenTransformManager.prototype.onEndEvent = function () {
* transformation pipeline (normally phase1 SyncTokenTransformManager and
* phase2 AsyncTokenTransformManager). This pipeline needs to be independent
* of the containing TokenTransformManager to isolate transforms from each
* other.
* other. The AttributeTransformManager returns its result by calling the
* supplied callback.
*
* @class
* @constructor
@ -649,26 +650,110 @@ SyncTokenTransformManager.prototype.onEndEvent = function () {
*/
function AttributeTransformManager ( manager, callback ) {
this.callback = callback;
var pipe = manager.newAttributePipeline( manager.args );
pipe.addListener( 'chunk', this.onChunk.bind( this ) );
pipe.addListener( 'end', this.onEnd.bind( this ) );
this.outstanding = 0;
this.kvs = [];
this.pipe = manager.newAttributePipeline( manager.args );
}
AttributeTransformManager.prototype.transformAttributes = function ( attributes ) {
// Potentially need to use multiple pipelines to support concurrent async expansion
//this.pipe.process(
var pipe,
ref;
// transform each argument (key and value), and handle asynchronous returns
for ( var i = 0, l = attributes.length; i < l; i++ ) {
kv = { key: [], value: [] };
kvs.push( kv );
ref = { frame: templateExpandData, index: i };
// Assume that the return is async, will be decremented in callback
this.outstanding += 2;
// transform the key
pipe = manager.getAttributePipeline( manager.args );
pipe.last.addListener( 'chunk',
this.onChunk.bind( this, this._returnAttributeKey, ref )
);
pipe.last.addListener( 'end',
this.onEnd.bind( this, this._returnAttributeKey, ref )
);
pipe.process( attributes[i][0] );
// transform the value
pipe = manager.getAttributePipeline( manager.args );
pipe.last.addListener( 'chunk',
this.onChunk.bind( this, this._returnAttributeValue, ref )
);
pipe.last.addListener( 'end',
this.onEnd.bind( this, this._returnAttributeKey, ref )
);
pipe.process( attributes[i][1] );
}
};
AttributeTransformManager.prototype._returnAttributes = function ( ) {
// convert attributes
var out = [];
for ( var i = 0, l = this.kvs.length; i < l; i++ ) {
out.push( [this.kvs.key, this.kvs.value] );
}
// and call the callback with the result
this.callback( out );
};
/**
* Collect chunks returned from the pipeline
*/
AttributeTransformManager.prototype.onChunk = function ( chunk ) {
this.callback( chunk, true );
AttributeTransformManager.prototype.onChunk = function ( callback, ref, chunk ) {
callback.call( this, ref, chunk, true );
};
/**
* Empty the pipeline by returning to the parent
*/
AttributeTransformManager.prototype.onEnd = function ( ) {
this.callback( [], false );
AttributeTransformManager.prototype.onEnd = function ( callback, ref ) {
callback.call(this, ref, [], false );
};
/**
* Callback for async argument value expansions
*/
AttributeTransformManager.prototype._returnAttributeValue = function ( ref, tokens, notYetDone ) {
var frame = ref.frame;
this.kvs[ref.index].value.push( tokens );
if ( ! notYetDone ) {
frame.outstanding--;
if ( frame.outstanding === 0 ) {
// this calls back to frame.cb, so no return here.
this.outstanding--;
if ( this.outstanding === 0 ) {
this._returnAttributes();
}
}
}
};
/**
* Callback for async argument key expansions
*/
AttributeTransformManager.prototype._returnAttributeKey = function ( ref, tokens, notYetDone ) {
var frame = ref.frame;
this.kvs[ref.index].key.push( tokens );
if ( ! notYetDone ) {
frame.outstanding--;
if ( frame.outstanding === 0 ) {
// this calls back to frame.cb, so no return here.
this.outstanding--;
if ( this.outstanding === 0 ) {
this._returnAttributes();
}
}
}
};
/******************************* TokenAccumulator *************************/

View file

@ -9,6 +9,7 @@
// make this global for now
// XXX: figure out a way to get away without a global for PEG actions!
$ = require('jquery');
var events = require( 'events' );
var fs = require('fs'),
path = require('path'),
@ -47,6 +48,14 @@ function ParserPipeline( env, inputType ) {
this.env = env;
}
// set up a sub-pipeline cache
this.pipelineCache = {
'text/wiki': {
'input': [],
'attribute': []
}
};
// Create an input pipeline for the given input type.
this.inputPipeline = this.makeInputPipeline ( inputType );
@ -95,6 +104,8 @@ function ParserPipeline( env, inputType ) {
// Lame hack for now, see above for an idea for the external async
// interface and pipeline setup
this.postProcessor.addListener( 'document', this.setDocumentProperty.bind( this ) );
}
/**
@ -114,28 +125,37 @@ function ParserPipeline( env, inputType ) {
ParserPipeline.prototype.makeInputPipeline = function ( inputType, args ) {
switch ( inputType ) {
case 'text/wiki':
var wikiTokenizer = new PegTokenizer();
if ( this.pipelineCache['text/wiki'].input.length ) {
return this.pipelineCache['text/wiki'].attribute.pop();
} else {
var wikiTokenizer = new PegTokenizer();
/**
* Token stream transformations.
* This is where all the wiki-specific functionality is implemented.
* See https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations
*/
// XXX: Use this.env.config.transforms['inputType'][stage] or
// somesuch to set up the transforms by input type
var tokenPreProcessor = new TokenTransformManager.SyncTokenTransformManager ( this.env );
tokenPreProcessor.listenForTokensFrom ( wikiTokenizer );
/**
* Token stream transformations.
* This is where all the wiki-specific functionality is implemented.
* See https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations
*/
// XXX: Use this.env.config.transforms['inputType'][stage] or
// somesuch to set up the transforms by input type
var tokenPreProcessor = new TokenTransformManager.SyncTokenTransformManager ( this.env );
tokenPreProcessor.listenForTokensFrom ( wikiTokenizer );
var tokenExpander = new TokenTransformManager.AsyncTokenTransformManager (
{
'input': this.makeInputPipeline.bind( this ),
'attributes': this.makeAttributePipeline.bind( this )
},
args, this.env
);
tokenExpander.listenForTokensFrom ( tokenPreProcessor );
var tokenExpander = new TokenTransformManager.AsyncTokenTransformManager (
{
'input': this.makeInputPipeline.bind( this ),
'attributes': this.makeAttributePipeline.bind( this )
},
args, this.env
);
tokenExpander.listenForTokensFrom ( tokenPreProcessor );
return { first: wikiTokenizer, last: tokenExpander };
return new CachedTokenPipeline(
this.cachePipeline.bind( this, 'text/wiki', 'input' ),
wikiTokenizer,
tokenExpander
);
}
break;
default:
throw "ParserPipeline.makeInputPipeline: Unsupported input type " + inputType;
@ -143,24 +163,41 @@ ParserPipeline.prototype.makeInputPipeline = function ( inputType, args ) {
};
/**
* Factory for attribute transformations, with input type implicit in the
* environment.
*/
ParserPipeline.prototype.makeAttributePipeline = function ( args ) {
/**
* Token stream transformations.
* This is where all the wiki-specific functionality is implemented.
* See https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations
*/
var tokenPreProcessor = new TokenTransformManager.SyncTokenTransformManager ( this.env );
var tokenExpander = new TokenTransformManager.AsyncTokenTransformManager (
this.makeInputPipeline.bind( this ), args, this.env );
tokenExpander.listenForTokensFrom ( tokenPreProcessor );
if ( this.pipelineCache['text/wiki'].attribute.length ) {
return this.pipelineCache['text/wiki'].attribute.pop();
} else {
/**
* Token stream transformations.
* This is where all the wiki-specific functionality is implemented.
* See https://www.mediawiki.org/wiki/Future/Parser_development/Token_stream_transformations
*/
var tokenPreProcessor = new TokenTransformManager.SyncTokenTransformManager ( this.env );
var tokenExpander = new TokenTransformManager.AsyncTokenTransformManager (
this.makeInputPipeline.bind( this ), args, this.env );
tokenExpander.listenForTokensFrom ( tokenPreProcessor );
return { first: tokenPreProcessor, last: tokenExpander };
return new CachedTokenPipeline(
this.cachePipeline.bind( this, 'text/wiki', 'attribute' ),
tokenPreProcessor,
tokenExpander
);
}
};
ParserPipeline.prototype.cachePipeline = function ( inputType, pipelinePart, pipe ) {
var cache = this.pipelineCache[inputType][pipelinePart];
if ( cache && cache.length < 50 ) {
cache.push( pipe );
}
};
/**
* Feed the parser pipeline with some input, the output is emitted in events.
@ -195,6 +232,52 @@ ParserPipeline.prototype.getWikiDom = function () {
};
/************************ CachedTokenPipeline ********************************/
/**
* Manage a part of a pipeline, that emits 'end' and 'chunk' events from its
* last stage.
*
* @class
* @constructor
* @param {
*/
function CachedTokenPipeline ( returnToCacheCB, first, last ) {
this.returnToCacheCB = returnToCacheCB;
this.first = first;
this.last = last;
this.last.addListener( 'end', this.forwardEndAndRecycleSelf.bind( this ) );
this.last.addListener( 'chunk', this.forwardChunk.bind( this ) );
}
// Inherit from EventEmitter
CachedTokenPipeline.prototype = new events.EventEmitter();
CachedTokenPipeline.prototype.constructor = CachedTokenPipeline;
/**
* Forward chunks to our listeners
*/
CachedTokenPipeline.prototype.forwardChunk = function ( chunk ) {
this.emit( 'chunk', chunk );
};
/**
* Chunk and end event consumer and emitter, that removes all listeners from
* the given pipeline stage and returns it to a cache.
*/
CachedTokenPipeline.prototype.forwardEndAndRecycleSelf = function ( ) {
// first, forward the event
this.emit( 'end' );
// now recycle self
this.removeAllListeners( 'end' );
this.removeAllListeners( 'chunk' );
this.returnToCacheCB ( this );
};
if (typeof module == "object") {
module.exports.ParserPipeline = ParserPipeline;
}