mediawiki-extensions-Visual.../modules/parser/ext.core.TemplateHandler.js

490 lines
15 KiB
JavaScript
Raw Normal View History

/**
* Template and template argument handling, first cut.
*
* AsyncTokenTransformManager objects provide preprocessor-frame-like
* functionality once template args etc are fully expanded, and isolate
2012-01-20 00:49:27 +00:00
* individual transforms from concurrency issues. Template expansion is
* controlled using a tplExpandData structure created independently for each
* handled template tag.
*
* @author Gabriel Wicke <gwicke@wikimedia.org>
* @author Brion Vibber <brion@wikimedia.org>
*/
var $ = require('jquery'),
request = require('request'),
events = require('events'),
qs = require('querystring'),
ParserFunctions = require('./ext.core.ParserFunctions.js').ParserFunctions,
AttributeTransformManager = require('./mediawiki.TokenTransformManager.js')
.AttributeTransformManager,
defines = require('./mediawiki.parser.defines.js');
function TemplateHandler ( manager ) {
this.register( manager );
this.parserFunctions = new ParserFunctions( manager );
}
// constants
TemplateHandler.prototype.rank = 1.1;
TemplateHandler.prototype.register = function ( manager ) {
this.manager = manager;
// Register for template and templatearg tag tokens
manager.addTransform( this.onTemplate.bind(this),
this.rank, 'tag', 'template' );
// Template argument expansion
manager.addTransform( this.onTemplateArg.bind(this),
this.rank, 'tag', 'templatearg' );
};
/**
* Main template token handler
*
* Expands target and arguments (both keys and values) and either directly
* calls or sets up the callback to _expandTemplate, which then fetches and
* processes the template.
*/
2012-02-07 11:53:29 +00:00
TemplateHandler.prototype.onTemplate = function ( token, frame, cb ) {
//console.warn('onTemplate! ' + JSON.stringify( token, null, 2 ) +
// ' args: ' + JSON.stringify( this.manager.args ));
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
// expand argument keys, with callback set to next processing step
// XXX: would likely be faster to do this in a tight loop here
var atm = new AttributeTransformManager(
this.manager,
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
this._expandTemplate.bind( this, token, frame, cb )
);
cb( { async: true } );
atm.processKeys( token.attribs );
};
/**
* Create positional (number) keys for arguments without explicit keys
*/
TemplateHandler.prototype._nameArgs = function ( attribs ) {
var n = 1,
out = [];
for ( var i = 0, l = attribs.length; i < l; i++ ) {
// FIXME: Also check for whitespace-only named args!
if ( ! attribs[i].k.length ) {
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
out.push( new KV( n.toString(), attribs[i].v ) );
n++;
} else {
out.push( attribs[i] );
}
}
this.manager.env.dp( '_nameArgs: ', out );
return out;
};
/**
* Fetch, tokenize and token-transform a template after all arguments and the
* target were expanded.
*/
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
TemplateHandler.prototype._expandTemplate = function ( token, frame, cb, attribs ) {
//console.warn('TemplateHandler.expandTemplate: ' +
2012-01-20 00:49:27 +00:00
// JSON.stringify( tplExpandData, null, 2 ) );
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
var target = attribs[0].k;
2012-01-20 00:49:27 +00:00
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
if ( ! target ) {
this.manager.env.ap( 'No target! ', attribs );
console.trace();
}
2012-01-20 00:49:27 +00:00
// TODO:
// check for 'subst:'
// check for variable magic names
// check for msg, msgnw, raw magics
// check for parser functions
// First, check the target for loops
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
target = this.manager.env.tokensToString( target ).trim();
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
//var args = this.manager.env.KVtoHash( tplExpandData.expandedArgs );
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
// strip subst for now.
target = target.replace( /^(safe)?subst:/, '' );
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
// XXX: wrap attribs in object with .dict() and .named() methods,
// and each member (key/value) into object with .tokens(), .dom() and
// .wikitext() methods (subclass of Array)
var prefix = target.split(':', 1)[0].toLowerCase().trim();
if ( prefix && 'pf_' + prefix in this.parserFunctions ) {
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
var pfAttribs = new Params( this.manager.env, attribs );
pfAttribs[0] = new KV( target.substr( prefix.length + 1 ), [] );
//this.manager.env.dp( 'func prefix/args: ', prefix,
// tplExpandData.expandedArgs,
// 'unnamedArgs', tplExpandData.origToken.attribs,
// 'funcArg:', funcArg
// );
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
this.manager.env.dp( 'entering prefix', target, token );
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
this.parserFunctions[ 'pf_' + prefix ]
( token, this.manager.frame, cb, pfAttribs );
return;
}
this.manager.env.tp( 'template target: ' + target );
// now normalize the target before template processing
target = this.manager.env.normalizeTitle( target );
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
// Resolve a possibly relative link
var templateName = this.manager.env.resolveTitle(
target,
'Template'
);
var checkRes = this.manager.frame.loopAndDepthCheck( templateName, this.manager.env.maxDepth );
if( checkRes ) {
// Loop detected or depth limit exceeded, abort!
res = [
checkRes,
new TagTk( 'a', [{k: 'href', v: target}] ),
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
templateName,
new EndTagTk( 'a' )
];
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
res.rank = this.manager.phaseEndRank;
cb( { tokens: res } );
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
return;
}
2012-01-20 00:49:27 +00:00
// XXX: notes from brion's mediawiki.parser.environment
// resolve template name
// load template w/ canonical name
// load template w/ variant names (language variants)
// For now, just fetch the template and pass the callback for further
// processing along.
this._fetchTemplateAndTitle(
templateName,
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
cb,
this._processTemplateAndTitle.bind( this, token, frame, cb, templateName, attribs )
);
};
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
/**
* Process a fetched template source
*/
TemplateHandler.prototype._processTemplateAndTitle = function( token, frame, cb, name, attribs, src, type ) {
// Get a nested transformation pipeline for the input type. The input
// pipeline includes the tokenizer, synchronous stage-1 transforms for
// 'text/wiki' input and asynchronous stage-2 transforms).
var pipeline = this.manager.pipeFactory.getPipeline(
type || 'text/x-mediawiki', true
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
);
pipeline.setFrame( this.manager.frame, name, attribs );
// Hook up the inputPipeline output events to our handlers
pipeline.addListener( 'chunk', this._onChunk.bind ( this, cb ) );
pipeline.addListener( 'end', this._onEnd.bind ( this, cb ) );
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
// Feed the pipeline. XXX: Support different formats.
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
this.manager.env.dp( 'TemplateHandler._processTemplateAndTitle', name, attribs );
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
pipeline.process ( src, name );
};
/**
* Handle chunk emitted from the input pipeline after feeding it a template
*/
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
TemplateHandler.prototype._onChunk = function( cb, chunk ) {
// We encapsulate the output by default, so collect tokens here.
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
chunk = this.manager.env.stripEOFTkfromTokens( chunk );
this.manager.env.dp( 'TemplateHandler._onChunk', chunk );
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
cb( { tokens: chunk, async: true } );
};
/**
* Handle the end event emitted by the parser pipeline after fully processing
* the template source.
*/
TemplateHandler.prototype._onEnd = function( cb ) {
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
this.manager.env.dp( 'TemplateHandler._onEnd' );
cb( { tokens: [] } );
};
/**
* Fetch a template
*/
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
TemplateHandler.prototype._fetchTemplateAndTitle = function ( title, parentCB, cb ) {
// @fixme normalize name?
var self = this;
if ( title in this.manager.env.pageCache ) {
// XXX: store type too (and cache tokens/x-mediawiki)
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
cb( self.manager.env.pageCache[title] /* , type */ );
} else if ( ! this.manager.env.fetchTemplates ) {
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
parentCB( { tokens: [ 'Warning: Page/template fetching disabled, and no cache for ' +
title ] } );
} else {
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
// We are about to start an async request for a template
this.manager.env.dp( 'Note: trying to fetch ', title );
// Start a new request if none is outstanding
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
//this.manager.env.dp( 'requestQueue: ', this.manager.env.requestQueue );
if ( this.manager.env.requestQueue[title] === undefined ) {
this.manager.env.tp( 'Note: Starting new request for ' + title );
this.manager.env.requestQueue[title] = new TemplateRequest( this.manager, title );
}
// Append a listener to the request at the toplevel, but prepend at
// lower levels to enforce depth-first processing
if ( false && this.manager.isInclude ) {
// prepend request: deal with requests from includes first
this.manager.env.requestQueue[title]
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
.listeners( 'src' ).unshift( cb );
} else {
// append request, process in document order
this.manager.env.requestQueue[title]
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
.listeners( 'src' ).push( cb );
}
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
parentCB ( { async: true } );
}
};
/*********************** Template argument expansion *******************/
/**
* Expand template arguments with tokens from the containing frame.
*/
2012-02-07 11:53:29 +00:00
TemplateHandler.prototype.onTemplateArg = function ( token, frame, cb ) {
new AttributeTransformManager (
this.manager,
this._returnArgAttributes.bind( this, token, cb, frame )
).process( token.attribs.slice() );
};
TemplateHandler.prototype._returnArgAttributes = function ( token, cb, frame, attributes ) {
//console.warn( '_returnArgAttributes: ' + JSON.stringify( attributes ));
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
var argName = this.manager.env.tokensToString( attributes[0].k ).trim(),
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
res,
dict = this.manager.frame.args.named();
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
this.manager.env.dp( 'args', argName /*, dict*/ );
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
if ( argName in dict ) {
// return tokens for argument
//console.warn( 'templateArg found: ' + argName +
// ' vs. ' + JSON.stringify( this.manager.args ) );
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
res = dict[argName];
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
this.manager.env.dp( 'arg res:', res );
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
if ( res.constructor === String ) {
cb( { tokens: [res] } );
} else {
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
dict[argName].get({
type: 'tokens/x-mediawiki/expanded',
cb: function( res ) { cb ( { tokens: res } ); },
asyncCB: cb
});
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
}
return;
} else {
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
this.manager.env.dp( 'templateArg not found: ', argName
/*' vs. ', dict */ );
if ( attributes.length > 1 ) {
res = attributes[1].v;
} else {
//console.warn('no default for ' + argName + JSON.stringify( attributes ));
res = [ '{{{' + argName + '}}}' ];
}
}
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
cb( { tokens: res } );
};
/***************** Template fetch request helper class ********/
function TemplateRequest ( manager, title ) {
// Increase the number of maximum listeners a bit..
Biggish token transform system refactoring * All parser pipelines including tokenizer and DOM stuff are now constructed from a 'recipe' data structure in a ParserPipelineFactory. * All sub-pipelines of these can now be cached * Event registrations to a pipeline are directly forwarded to the last pipeline member to save relatively expensive event forwarding. * Some APIs for on-demand expansion / format conversion of parameters from parser functions are added: param.to('tokens/expanded', cb) param.to('text/wiki', cb) (this does not work yet) All parameters are additionally wrapped into a Param object that provides method for positional parameter naming (.named() or conversion to a dict (.dict()). * The async token transform manager is now separated from a frame object, with the frame holding arguments, an on-demand expansion method and loop checks. * Only keys of template parameters are now expanded. Parser functions or template arguments trigger an expansion on-demand. This (unsurprisingly) makes a big performance difference with typical switch-heavy template systems. * Return values from async transforms are no longer used in favor of plain callbacks. This saves the complication of having to maintain two code paths. A trick in transformTokens still avoids the construction of unneeded TokenAccumulators. * The results of template expansions are no longer buffered. * 301 parser tests are passing Known issues: * Cosmetic cleanup remains to do * Some parser functions do not support async expansions yet, and need to be modified. Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 14:35:59 +00:00
this.setMaxListeners( 50000 );
this.retries = 5;
this.manager = manager;
this.title = title;
var url = manager.env.wgScript + '/api' +
manager.env.wgScriptExtension +
'?' +
qs.stringify( {
format: 'json',
action: 'query',
prop: 'revisions',
rvprop: 'content',
titles: title
} );
//'?format=json&action=query&prop=revisions&rvprop=content&titles=' + title;
this.requestOptions = {
method: 'GET',
followRedirect: true,
url: url,
headers: {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:9.0.1) ' +
'Gecko/20100101 Firefox/9.0.1 Iceweasel/9.0.1'
}
};
// Start the request
request( this.requestOptions, this._handler.bind(this) );
}
// Inherit from EventEmitter
TemplateRequest.prototype = new events.EventEmitter();
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
TemplateRequest.prototype.constructor = TemplateRequest;
TemplateRequest.prototype._handler = function (error, response, body) {
//console.warn( 'response for ' + title + ' :' + body + ':' );
var self = this;
if(error) {
this.manager.env.dp(error);
if ( this.retries ) {
this.retries--;
this.manager.env.tp( 'Retrying template request for ' + this.title );
// retry
request( this.requestOptions, this._handler.bind(this) );
} else {
this.emit('src', 'Page/template fetch failure for title ' + this.title,
'text/x-mediawiki');
}
} else if(response.statusCode === 200) {
var src = '',
data,
normalizedTitle;
try {
//console.warn( 'body: ' + body );
data = JSON.parse( body );
} catch(e) {
console.warn( "Error: while parsing result. Error was: " );
console.warn( e );
console.warn( "Response that didn't parse was:");
console.warn( "------------------------------------------\n" + body );
console.warn( "------------------------------------------" );
}
try {
$.each( data.query.pages, function(i, page) {
if (page.revisions && page.revisions.length) {
src = page.revisions[0]['*'];
normalizeTitle = page.title;
} else {
var normalName = self.manager.env.normalizeTitle(
self.manager.env.pageName );
console.warn( 'Did not find page revisions for ' + self.title );
if ( this.title === normalName ) {
src = 'No revisions for ' + self.title;
}
}
});
} catch ( e2 ) {
console.warn( 'Did not find page revisions in the returned body:' + body + e2 );
}
// check for #REDIRECT
var redirMatch = src.match( /[\r\n\s]*#\s*redirect\s*\[\[([^\]]+)\]\]/i );
if ( redirMatch ) {
var title = redirMatch[1];
var url = this.manager.env.wgScript + '/api' +
this.manager.env.wgScriptExtension +
'?' +
qs.stringify( {
format: 'json',
action: 'query',
prop: 'revisions',
rvprop: 'content',
titles: title
} );
//'?format=json&action=query&prop=revisions&rvprop=content&titles=' + title;
this.requestOptions.url = url;
request( this.requestOptions, this._handler.bind(this) );
return;
}
//console.warn( 'Page ' + title + ': got ' + src );
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
this.manager.env.tp( 'Retrieved ' + this.title, src );
// Add the source to the cache
this.manager.env.pageCache[this.title] = src;
// Process only a few callbacks in each event loop iteration to
// reduce memory usage.
//
//
var listeners = this.listeners( 'src' );
var processSome = function () {
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
// XXX: experiment a bit with the number of callbacks per
// iteration!
var maxIters = Math.min(1, listeners.length);
for ( var it = 0; it < maxIters; it++ ) {
var nextListener = listeners.shift();
// We only retrieve text/x-mediawiki source currently.
nextListener( src, 'text/x-mediawiki' );
}
if ( listeners.length ) {
process.nextTick( processSome );
}
};
process.nextTick( processSome );
//processSome();
//self.emit( 'src', src, title );
}
// XXX: handle other status codes
// Remove self from request queue
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
//this.manager.env.dp( 'trying to remove ', this.title, ' from requestQueue' );
delete this.manager.env.requestQueue[this.title];
Big token transform framework overhaul part 2 * Tokens are now immutable. The progress of transformations is tracked on chunks instead of tokens. Tokenizer output is cached and can be directly returned without a need for cloning. Transforms are required to clone or newly create tokens they are modifying. * Expansions per chunk are now shared between equivalent frames via a cache stored on the chunk itself. Equivalence of frames is not yet ideal though, as right now a hash tree of *unexpanded* arguments is used. This should be switched to a hash of the fully expanded local parameters instead. * There is now a vastly improved maybeSyncReturn wrapper for async transforms that either forwards processing to the iterative transformTokens if the current transform is still ongoing, or manages a recursive transformation if needed. * Parameters for parser functions are now wrapped in abstract Params and ParserValue objects, which support some handy on-demand *value* expansions. Keys are always expanded. Parser functions are converted to use these interfaces, and now properly expand their values in the correct frame. Making this expansion lazier is certainly possible, but would complicate transformTokens and other token-handling machinery. Need to investigate if it would really be worth it. Dead branch elimination is certainly a bigger win overall. * Complex recursive asynchronous expansions should now be closer to correct for both the iterative (transformTokens) and recursive (maybeSyncReturn after transformTokens has returned) code paths. * Performance degraded slightly. There are no micro-optimizations done yet and the shared expansion cache still has a low hit rate. The progress tracking on chunks is not yet perfect, so there are likely a lot of unneeded re-expansions that can be easily eliminated. There is also more debug tracing right now. Obama currently expands in 54 seconds on my laptop. Change-Id: I4a603f3d3c70ca657ebda9fbb8570269f943d6b6
2012-05-10 08:04:24 +00:00
//this.manager.env.dp( 'after deletion:', this.manager.env.requestQueue );
};
/*
* XXX: The jQuery version does not quite work with node, but we keep
* it around for now.
$.ajax({
url: url,
data: {
format: 'json',
action: 'query',
prop: 'revisions',
rvprop: 'content',
titles: title
},
success: function(data, statusString, xhr) {
console.warn( 'Page ' + title + ' success ' + JSON.stringify( data ) );
var src = null, title = null;
$.each(data.query.pages, function(i, page) {
if (page.revisions && page.revisions.length) {
src = page.revisions[0]['*'];
title = page.title;
}
});
if (typeof src !== 'string') {
console.warn( 'Page ' + title + 'not found! Got ' + src );
callback( 'Page ' + title + ' not found' );
} else {
// Add to cache
console.warn( 'Page ' + title + ': got ' + src );
this.manager.env.pageCache[title] = src;
callback(src, title);
}
},
error: function(xhr, msg, err) {
console.warn( 'Page/template fetch failure for title ' +
title + ', url=' + url + JSON.stringify(xhr) + ', err=' + err );
callback('Page/template fetch failure for title ' + title);
},
dataType: 'json',
cache: false, // @fixme caching, versions etc?
crossDomain: true
});
*/
if (typeof module == "object") {
module.exports.TemplateHandler = TemplateHandler;
}