mediawiki-extensions-Visual.../modules/parser/ext.core.TemplateHandler.js
Gabriel Wicke 21286a50df Make sure pageName is set in the web service, and handle empty page name in parser function
Change-Id: I5d36eefecc2f35a860d00a8960004f8e651ed17c
2012-05-23 16:43:45 +02:00

484 lines
15 KiB
JavaScript

/**
* Template and template argument handling, first cut.
*
* AsyncTokenTransformManager objects provide preprocessor-frame-like
* functionality once template args etc are fully expanded, and isolate
* individual transforms from concurrency issues. Template expansion is
* controlled using a tplExpandData structure created independently for each
* handled template tag.
*
* @author Gabriel Wicke <gwicke@wikimedia.org>
* @author Brion Vibber <brion@wikimedia.org>
*/
var $ = require('jquery'),
request = require('request'),
events = require('events'),
qs = require('querystring'),
ParserFunctions = require('./ext.core.ParserFunctions.js').ParserFunctions,
AttributeTransformManager = require('./mediawiki.TokenTransformManager.js')
.AttributeTransformManager,
defines = require('./mediawiki.parser.defines.js');
function TemplateHandler ( manager ) {
this.register( manager );
this.parserFunctions = new ParserFunctions( manager );
}
// constants
TemplateHandler.prototype.rank = 1.1;
TemplateHandler.prototype.register = function ( manager ) {
this.manager = manager;
// Register for template and templatearg tag tokens
manager.addTransform( this.onTemplate.bind(this),
this.rank, 'tag', 'template' );
// Template argument expansion
manager.addTransform( this.onTemplateArg.bind(this),
this.rank, 'tag', 'templatearg' );
};
/**
* Main template token handler
*
* Expands target and arguments (both keys and values) and either directly
* calls or sets up the callback to _expandTemplate, which then fetches and
* processes the template.
*/
TemplateHandler.prototype.onTemplate = function ( token, frame, cb ) {
//console.warn('onTemplate! ' + JSON.stringify( token, null, 2 ) +
// ' args: ' + JSON.stringify( this.manager.args ));
// expand argument keys, with callback set to next processing step
// XXX: would likely be faster to do this in a tight loop here
var atm = new AttributeTransformManager(
this.manager,
this._expandTemplate.bind( this, token, frame, cb )
);
cb( { async: true } );
atm.processKeys( token.attribs );
};
/**
* Create positional (number) keys for arguments without explicit keys
*/
TemplateHandler.prototype._nameArgs = function ( attribs ) {
var n = 1,
out = [];
for ( var i = 0, l = attribs.length; i < l; i++ ) {
// FIXME: Also check for whitespace-only named args!
if ( ! attribs[i].k.length ) {
out.push( new KV( n.toString(), attribs[i].v ) );
n++;
} else {
out.push( attribs[i] );
}
}
this.manager.env.dp( '_nameArgs: ', out );
return out;
};
/**
* Fetch, tokenize and token-transform a template after all arguments and the
* target were expanded.
*/
TemplateHandler.prototype._expandTemplate = function ( token, frame, cb, attribs ) {
//console.warn('TemplateHandler.expandTemplate: ' +
// JSON.stringify( tplExpandData, null, 2 ) );
var target = attribs[0].k;
if ( ! target ) {
this.manager.env.ap( 'No target! ', attribs );
console.trace();
}
// TODO:
// check for 'subst:'
// check for variable magic names
// check for msg, msgnw, raw magics
// check for parser functions
// First, check the target for loops
target = this.manager.env.tokensToString( target ).trim();
//var args = this.manager.env.KVtoHash( tplExpandData.expandedArgs );
// strip subst for now.
target = target.replace( /^(safe)?subst:/, '' );
// XXX: wrap attribs in object with .dict() and .named() methods,
// and each member (key/value) into object with .tokens(), .dom() and
// .wikitext() methods (subclass of Array)
var prefix = target.split(':', 1)[0].toLowerCase().trim();
if ( prefix && 'pf_' + prefix in this.parserFunctions ) {
var pfAttribs = new Params( this.manager.env, attribs );
pfAttribs[0] = new KV( target.substr( prefix.length + 1 ), [] );
//this.manager.env.dp( 'func prefix/args: ', prefix,
// tplExpandData.expandedArgs,
// 'unnamedArgs', tplExpandData.origToken.attribs,
// 'funcArg:', funcArg
// );
this.manager.env.dp( 'entering prefix', target, token );
this.parserFunctions[ 'pf_' + prefix ]
( token, this.manager.frame, cb, pfAttribs );
return;
}
this.manager.env.tp( 'template target: ' + target );
// now normalize the target before template processing
target = this.manager.env.normalizeTitle( target );
// Resolve a possibly relative link
var templateName = this.manager.env.resolveTitle(
target,
'Template'
);
var checkRes = this.manager.frame.loopAndDepthCheck( templateName, this.manager.env.maxDepth );
if( checkRes ) {
// Loop detected or depth limit exceeded, abort!
res = [
checkRes,
new TagTk( 'a', [{k: 'href', v: target}] ),
templateName,
new EndTagTk( 'a' )
];
res.rank = this.manager.phaseEndRank;
cb( { tokens: res } );
return;
}
// XXX: notes from brion's mediawiki.parser.environment
// resolve template name
// load template w/ canonical name
// load template w/ variant names (language variants)
// For now, just fetch the template and pass the callback for further
// processing along.
this._fetchTemplateAndTitle(
templateName,
cb,
this._processTemplateAndTitle.bind( this, token, frame, cb, templateName, attribs )
);
};
/**
* Process a fetched template source
*/
TemplateHandler.prototype._processTemplateAndTitle = function( token, frame, cb, name, attribs, src, type ) {
// Get a nested transformation pipeline for the input type. The input
// pipeline includes the tokenizer, synchronous stage-1 transforms for
// 'text/wiki' input and asynchronous stage-2 transforms).
var pipeline = this.manager.pipeFactory.getPipeline(
type || 'text/x-mediawiki', true
);
pipeline.setFrame( this.manager.frame, name, attribs );
// Hook up the inputPipeline output events to our handlers
pipeline.addListener( 'chunk', this._onChunk.bind ( this, cb ) );
pipeline.addListener( 'end', this._onEnd.bind ( this, cb ) );
// Feed the pipeline. XXX: Support different formats.
this.manager.env.dp( 'TemplateHandler._processTemplateAndTitle', name, attribs );
pipeline.process ( src, name );
};
/**
* Handle chunk emitted from the input pipeline after feeding it a template
*/
TemplateHandler.prototype._onChunk = function( cb, chunk ) {
// We encapsulate the output by default, so collect tokens here.
chunk = this.manager.env.stripEOFTkfromTokens( chunk );
this.manager.env.dp( 'TemplateHandler._onChunk', chunk );
cb( { tokens: chunk, async: true } );
};
/**
* Handle the end event emitted by the parser pipeline after fully processing
* the template source.
*/
TemplateHandler.prototype._onEnd = function( cb ) {
this.manager.env.dp( 'TemplateHandler._onEnd' );
cb( { tokens: [] } );
};
/**
* Fetch a template
*/
TemplateHandler.prototype._fetchTemplateAndTitle = function ( title, parentCB, cb ) {
// @fixme normalize name?
var self = this;
if ( title in this.manager.env.pageCache ) {
// XXX: store type too (and cache tokens/x-mediawiki)
cb( self.manager.env.pageCache[title] /* , type */ );
} else if ( ! this.manager.env.fetchTemplates ) {
parentCB( { tokens: [ 'Warning: Page/template fetching disabled, and no cache for ' +
title ] } );
} else {
// We are about to start an async request for a template
this.manager.env.dp( 'Note: trying to fetch ', title );
// Start a new request if none is outstanding
//this.manager.env.dp( 'requestQueue: ', this.manager.env.requestQueue );
if ( this.manager.env.requestQueue[title] === undefined ) {
this.manager.env.tp( 'Note: Starting new request for ' + title );
this.manager.env.requestQueue[title] = new TemplateRequest( this.manager, title );
}
// Append a listener to the request at the toplevel, but prepend at
// lower levels to enforce depth-first processing
if ( false && this.manager.isInclude ) {
// prepend request: deal with requests from includes first
this.manager.env.requestQueue[title]
.listeners( 'src' ).unshift( cb );
} else {
// append request, process in document order
this.manager.env.requestQueue[title]
.listeners( 'src' ).push( cb );
}
parentCB ( { async: true } );
}
};
/*********************** Template argument expansion *******************/
/**
* Expand template arguments with tokens from the containing frame.
*/
TemplateHandler.prototype.onTemplateArg = function ( token, frame, cb ) {
new AttributeTransformManager (
this.manager,
this._returnArgAttributes.bind( this, token, cb, frame )
).process( token.attribs.slice() );
};
TemplateHandler.prototype._returnArgAttributes = function ( token, cb, frame, attributes ) {
//console.warn( '_returnArgAttributes: ' + JSON.stringify( attributes ));
var argName = this.manager.env.tokensToString( attributes[0].k ).trim(),
res,
dict = this.manager.frame.args.named();
this.manager.env.dp( 'args', argName /*, dict*/ );
if ( argName in dict ) {
// return tokens for argument
//console.warn( 'templateArg found: ' + argName +
// ' vs. ' + JSON.stringify( this.manager.args ) );
res = dict[argName];
this.manager.env.dp( 'arg res:', res );
if ( res.constructor === String ) {
cb( { tokens: [res] } );
} else {
dict[argName].get({
type: 'tokens/x-mediawiki/expanded',
cb: function( res ) { cb ( { tokens: res } ); },
asyncCB: cb
});
}
return;
} else {
this.manager.env.dp( 'templateArg not found: ', argName
/*' vs. ', dict */ );
if ( attributes.length > 1 ) {
res = attributes[1].v;
} else {
//console.warn('no default for ' + argName + JSON.stringify( attributes ));
res = [ '{{{' + argName + '}}}' ];
}
}
cb( { tokens: res } );
};
/***************** Template fetch request helper class ********/
function TemplateRequest ( manager, title ) {
// Increase the number of maximum listeners a bit..
this.setMaxListeners( 50000 );
this.retries = 5;
this.manager = manager;
this.title = title;
var url = manager.env.wgScript + '/api' +
manager.env.wgScriptExtension +
'?' +
qs.stringify( {
format: 'json',
action: 'query',
prop: 'revisions',
rvprop: 'content',
titles: title
} );
//'?format=json&action=query&prop=revisions&rvprop=content&titles=' + title;
this.requestOptions = {
method: 'GET',
followRedirect: true,
url: url,
headers: {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:9.0.1) ' +
'Gecko/20100101 Firefox/9.0.1 Iceweasel/9.0.1'
}
};
// Start the request
request( this.requestOptions, this._handler.bind(this) );
}
// Inherit from EventEmitter
TemplateRequest.prototype = new events.EventEmitter();
TemplateRequest.prototype.constructor = TemplateRequest;
TemplateRequest.prototype._handler = function (error, response, body) {
//console.warn( 'response for ' + title + ' :' + body + ':' );
if(error) {
this.manager.env.dp(error);
if ( this.retries ) {
this.retries--;
this.manager.env.tp( 'Retrying template request for ' + this.title );
var self = this;
// retry
request( this.requestOptions, this._handler.bind(this) );
} else {
this.emit('src', 'Page/template fetch failure for title ' + this.title,
'text/x-mediawiki');
}
} else if(response.statusCode == 200) {
var src = '',
data,
normalizedTitle;
try {
//console.warn( 'body: ' + body );
data = JSON.parse( body );
} catch(e) {
console.warn( "Error: while parsing result. Error was: " );
console.warn( e );
console.warn( "Response that didn't parse was:");
console.warn( "------------------------------------------\n" + body );
console.warn( "------------------------------------------" );
}
try {
$.each( data.query.pages, function(i, page) {
if (page.revisions && page.revisions.length) {
src = page.revisions[0]['*'];
normalizeTitle = page.title;
}
});
} catch ( e2 ) {
console.warn( 'Did not find page revisions in the returned body:' + body );
src = '';
}
// check for #REDIRECT
var redirMatch = src.match( /[\r\n\s]*#\s*redirect\s\[\[([^\]]+)\]\]/i )
if ( redirMatch ) {
var title = redirMatch[1];
var url = this.manager.env.wgScript + '/api' +
this.manager.env.wgScriptExtension +
'?' +
qs.stringify( {
format: 'json',
action: 'query',
prop: 'revisions',
rvprop: 'content',
titles: title
} );
//'?format=json&action=query&prop=revisions&rvprop=content&titles=' + title;
this.requestOptions.url = url;
request( this.requestOptions, this._handler.bind(this) );
return;
}
//console.warn( 'Page ' + title + ': got ' + src );
this.manager.env.tp( 'Retrieved ' + this.title, src );
// Add the source to the cache
this.manager.env.pageCache[this.title] = src;
// Process only a few callbacks in each event loop iteration to
// reduce memory usage.
//
//
var listeners = this.listeners( 'src' );
var processSome = function () {
// XXX: experiment a bit with the number of callbacks per
// iteration!
var maxIters = Math.min(1, listeners.length);
for ( var it = 0; it < maxIters; it++ ) {
var nextListener = listeners.shift();
// We only retrieve text/x-mediawiki source currently.
nextListener( src, 'text/x-mediawiki' );
}
if ( listeners.length ) {
process.nextTick( processSome );
}
};
process.nextTick( processSome );
//processSome();
//self.emit( 'src', src, title );
}
// XXX: handle other status codes
// Remove self from request queue
//this.manager.env.dp( 'trying to remove ', this.title, ' from requestQueue' );
delete this.manager.env.requestQueue[this.title];
//this.manager.env.dp( 'after deletion:', this.manager.env.requestQueue );
};
/*
* XXX: The jQuery version does not quite work with node, but we keep
* it around for now.
$.ajax({
url: url,
data: {
format: 'json',
action: 'query',
prop: 'revisions',
rvprop: 'content',
titles: title
},
success: function(data, statusString, xhr) {
console.warn( 'Page ' + title + ' success ' + JSON.stringify( data ) );
var src = null, title = null;
$.each(data.query.pages, function(i, page) {
if (page.revisions && page.revisions.length) {
src = page.revisions[0]['*'];
title = page.title;
}
});
if (typeof src !== 'string') {
console.warn( 'Page ' + title + 'not found! Got ' + src );
callback( 'Page ' + title + ' not found' );
} else {
// Add to cache
console.warn( 'Page ' + title + ': got ' + src );
this.manager.env.pageCache[title] = src;
callback(src, title);
}
},
error: function(xhr, msg, err) {
console.warn( 'Page/template fetch failure for title ' +
title + ', url=' + url + JSON.stringify(xhr) + ', err=' + err );
callback('Page/template fetch failure for title ' + title);
},
dataType: 'json',
cache: false, // @fixme caching, versions etc?
crossDomain: true
});
*/
if (typeof module == "object") {
module.exports.TemplateHandler = TemplateHandler;
}