Fix async template expansion, so we can now render simple pages with templates

directly to WikiDom from enwiki using a commandline like this:

  echo '{{User:GWicke/Test}}' | node parse.js

Wohoo!

Complex pages with templates won't render properly yet, as noinclude /
includeonly and parser functions are not yet implemented. As a result, the
parser will run out of memory or hit the currently low expansion depth limit
as it tries to expand documentation for all templates.
This commit is contained in:
Gabriel Wicke 2012-01-19 23:43:39 +00:00
parent 2233d0a488
commit d0ece16c86
Notes: Gabriel Wicke 2012-02-27 16:40:01 +00:00
4 changed files with 185 additions and 85 deletions

View file

@ -22,7 +22,6 @@ function TemplateHandler ( manager ) {
}
TemplateHandler.prototype.reset = function ( token ) {
this.resultTokens = [];
return {token: token};
};
@ -56,8 +55,6 @@ TemplateHandler.prototype.onTemplate = function ( token, cb ) {
//console.log('onTemplate! ' + JSON.stringify( token, null, 2 ) +
// ' args: ' + JSON.stringify( this.manager.args ));
this.parentCB = cb;
// check for 'subst:'
// check for variable magic names
// check for msg, msgnw, raw magics
@ -67,10 +64,12 @@ TemplateHandler.prototype.onTemplate = function ( token, cb ) {
var templateTokenTransformData = {
args: {},
manager: this.manager,
outstanding: 1, // Avoid premature finish
cb: cb,
origToken: token,
isAsync: false
resultTokens: [],
attribsAsync: true,
overallAsync: false,
expandDone: false
},
transformCB,
i = 0,
@ -89,17 +88,22 @@ TemplateHandler.prototype.onTemplate = function ( token, cb ) {
).process( attributes );
// Unblock finish
templateTokenTransformData.outstanding--;
if ( templateTokenTransformData.outstanding === 0 ) {
//console.log( 'direct call');
if ( ! templateTokenTransformData.attribsAsync ) {
// Attributes were transformed synchronously
this.manager.env.dp( 'sync attribs for ' + JSON.stringify( token ));
// All attributes are fully expanded synchronously (no IO was needed)
return this._expandTemplate ( templateTokenTransformData );
} else {
templateTokenTransformData.isAsync = true;
// Async attribute expansion is going on
this.manager.env.dp( 'async return for ' + JSON.stringify( token ));
templateTokenTransformData.overallAsync = true;
return { async: true };
}
};
/**
* Create positional (number) keys for arguments without explicit keys
*/
TemplateHandler.prototype._nameArgs = function ( orderedArgs ) {
var n = 1,
out = [];
@ -115,37 +119,49 @@ TemplateHandler.prototype._nameArgs = function ( orderedArgs ) {
return out;
};
/**
* Callback for argument (including target) expansion in AttributeTransformManager
*/
TemplateHandler.prototype._returnAttributes = function ( templateTokenTransformData,
attributes )
{
//console.log( 'TemplateHandler._returnAttributes: ' + JSON.stringify(attributes) );
this.manager.env.dp( 'TemplateHandler._returnAttributes: ' + JSON.stringify(attributes) );
// Remove the target from the attributes
templateTokenTransformData.attribsAsync = false;
templateTokenTransformData.target = attributes[0][1];
attributes.shift();
templateTokenTransformData.expandedArgs = attributes;
if ( templateTokenTransformData.isAsync ) {
if ( templateTokenTransformData.overallAsync ) {
this._expandTemplate ( templateTokenTransformData );
}
};
/**
* Fetch, tokenize and token-transform a template after all arguments and the
* target were expanded in frame.
* target were expanded.
*/
TemplateHandler.prototype._expandTemplate = function ( templateTokenTransformData ) {
//console.log('TemplateHandler.expandTemplate: ' +
// JSON.stringify( templateTokenTransformData, null, 2 ) );
if ( ! templateTokenTransformData.target ) {
this.manager.env.dp( 'No target! ' +
JSON.stringify( templateTokenTransformData, null, 2 ) );
console.trace();
}
// First, check the target for loops
var target = this.manager.env.normalizeTitle(
this.manager.env.tokensToString( templateTokenTransformData.target )
);
if( this.manager.loopCheck.check( target ) ) {
var checkRes = this.manager.loopAndDepthCheck.check( target );
if( checkRes ) {
// Loop detected, abort!
return {
tokens: [
{
type: 'TEXT',
value: 'Template loop detected: '
value: checkRes
},
{
type: 'TAG',
@ -172,6 +188,8 @@ TemplateHandler.prototype._expandTemplate = function ( templateTokenTransformDat
//console.log( 'expanded args: ' +
// JSON.stringify( this.manager.env.KVtoHash(
// templateTokenTransformData.expandedArgs ) ) );
//console.log( 'templateTokenTransformData: ' +
// JSON.stringify( templateTokenTransformData , null ,2 ) );
var inputPipeline = this.manager.newChildPipeline(
this.manager.inputType || 'text/wiki',
@ -179,9 +197,10 @@ TemplateHandler.prototype._expandTemplate = function ( templateTokenTransformDat
templateTokenTransformData.target
);
// Hook up the inputPipeline output events to call back our parentCB.
inputPipeline.addListener( 'chunk', this._onChunk.bind ( this ) );
inputPipeline.addListener( 'end', this._onEnd.bind ( this ) );
// Hook up the inputPipeline output events to call back the parent
// callback.
inputPipeline.addListener( 'chunk', this._onChunk.bind ( this, templateTokenTransformData ) );
inputPipeline.addListener( 'end', this._onEnd.bind ( this, templateTokenTransformData ) );
// Resolve a possibly relative link
@ -189,9 +208,11 @@ TemplateHandler.prototype._expandTemplate = function ( templateTokenTransformDat
target,
'Template'
);
this._fetchTemplateAndTitle( templateName,
this._processTemplateAndTitle.bind( this, inputPipeline )
);
this._fetchTemplateAndTitle(
templateName,
this._processTemplateAndTitle.bind( this, inputPipeline ),
templateTokenTransformData
);
// Set up a pipeline:
// fetch template source -> tokenizer
@ -213,30 +234,41 @@ TemplateHandler.prototype._expandTemplate = function ( templateTokenTransformDat
// fetch from DB or interwiki
// infinte loop check
if ( this.isAsync ) {
if ( templateTokenTransformData.overallAsync ||
! templateTokenTransformData.expandDone ) {
templateTokenTransformData.overallAsync = true;
this.manager.env.dp( 'Async return from _expandTemplate for ' +
JSON.stringify ( templateTokenTransformData.target ) );
return { async: true };
} else {
return this.result;
this.manager.env.dp( 'Sync return from _expandTemplate for ' +
JSON.stringify( templateTokenTransformData.target ) + ' : ' +
JSON.stringify( templateTokenTransformData.result )
);
return templateTokenTransformData.result;
}
};
/**
* Convert AsyncTokenTransformManager output chunks to parent callbacks
* Handle chunk emitted from the input pipeline after feeding it a template
*/
TemplateHandler.prototype._onChunk = function( chunk ) {
TemplateHandler.prototype._onChunk = function( data, chunk ) {
// We encapsulate the output by default, so collect tokens here.
this.resultTokens = this.resultTokens.concat( chunk );
this.manager.env.dp( 'TemplateHandler._onChunk' + JSON.stringify( chunk ) );
data.resultTokens = data.resultTokens.concat( chunk );
};
/**
* Handle the end event emitted by the parser pipeline by calling our parentCB
* with notYetDone set to false.
* Handle the end event emitted by the parser pipeline after fully processing
* the template source.
*/
TemplateHandler.prototype._onEnd = function( token ) {
TemplateHandler.prototype._onEnd = function( data, token ) {
// Encapsulate the template in a single token, which contains all the
// information needed for the editor.
var res = this.resultTokens;
this.manager.env.dp( 'TemplateHandler._onEnd' + JSON.stringify( data.resultTokens ) );
data.expandDone = true;
var res = data.resultTokens;
// Remove 'end' token from end
if ( res.length && res[res.length - 1].type === 'END' ) {
res.pop();
@ -257,11 +289,14 @@ TemplateHandler.prototype._onEnd = function( token ) {
*/
//console.log( 'TemplateHandler._onEnd: ' + JSON.stringify( res, null, 2 ) );
if ( this.isAsync ) {
this.parentCB( res, false );
if ( data.overallAsync ) {
this.manager.env.dp( 'TemplateHandler._onEnd: calling back with res:' +
JSON.stringify( res ) );
data.cb( res, false );
} else {
this.result = { tokens: res };
this.reset();
this.manager.env.dp( 'TemplateHandler._onEnd: synchronous return!' );
data.result = { tokens: res };
//data.reset();
}
};
@ -272,7 +307,7 @@ TemplateHandler.prototype._onEnd = function( token ) {
*/
TemplateHandler.prototype._processTemplateAndTitle = function( pipeline, src, title ) {
// Feed the pipeline. XXX: Support different formats.
//console.log( 'TemplateHandler._processTemplateAndTitle: ' + src );
this.manager.env.dp( 'TemplateHandler._processTemplateAndTitle: ' + src );
pipeline.process ( src );
};
@ -281,7 +316,7 @@ TemplateHandler.prototype._processTemplateAndTitle = function( pipeline, src, ti
/**
* Fetch a template
*/
TemplateHandler.prototype._fetchTemplateAndTitle = function( title, callback ) {
TemplateHandler.prototype._fetchTemplateAndTitle = function( title, callback, data ) {
// @fixme normalize name?
var self = this;
if (title in this.manager.env.pageCache) {
@ -292,8 +327,8 @@ TemplateHandler.prototype._fetchTemplateAndTitle = function( title, callback ) {
} else {
// whee fun hack!
this.isAsync = true;
console.log( 'trying to fetch ' + title );
data.overallAsync = true;
this.manager.env.dp( 'trying to fetch ' + title );
//console.log(this.manager.env.pageCache);
var url = this.manager.env.wgScriptPath + '/api' +
this.manager.env.wgScriptExtension +
@ -301,46 +336,48 @@ TemplateHandler.prototype._fetchTemplateAndTitle = function( title, callback ) {
request({
method: 'GET',
//followRedirect: false,
followRedirect: true,
url: url,
headers: {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1 Iceweasel/9.0.1'
}
},
function (error, response, body) {
console.log( 'response for ' + title + ': ' + body );
//console.log( 'response for ' + title + ' :' + body + ':' );
if(error) {
console.log(error);
callback('Page/template fetch failure for title ' + title);
self.manager.env.dp(error);
callback('Page/template fetch failure for title ' + title, title);
return ;
}
if(response.statusCode == 200) {
try{
var src = '';
try {
//console.log( 'body: ' + body );
data = JSON.parse(body);
var src = null;
var data = JSON.parse( body );
} catch(e) {
console.log( "Error: while parsing result. Error was: " );
console.log( e );
console.log( "Response that didn't parse was:");
console.log( "------------------------------------------\n" + body );
console.log( "------------------------------------------" );
}
try {
$.each(data.query.pages, function(i, page) {
if (page.revisions && page.revisions.length) {
src = page.revisions[0]['*'];
title = page.title;
}
});
console.log( 'Page ' + title + ': got ' + src );
self.manager.env.pageCache[title] = src;
callback(src, title);
} catch(e) {
console.log("Error: while parsing result. Error was: ");
console.log(e);
console.log("Response that didn't parse was:\n" + body);
data = {
error: '',
errorWfMsg: 'chat-err-communicating-with-mediawiki',
errorMsgParams: []
};
} catch ( e ) {
console.log( 'Did not find page revisions in the returned body:' + body );
src = '';
}
console.log(data);
//console.log( 'Page ' + title + ': got ' + src );
self.manager.env.dp( 'Success for ' + title + ' :' + body + ':' );
self.manager.env.pageCache[title] = src;
callback(src, title);
self.manager.env.dp(data);
}
});
@ -389,6 +426,8 @@ TemplateHandler.prototype._fetchTemplateAndTitle = function( title, callback ) {
};
/*********************** Template argument expansion *******************/
/**
* Expand template arguments with tokens from the containing frame.
*/

View file

@ -314,7 +314,7 @@ function AsyncTokenTransformManager ( childFactories, args, env ) {
this._construct();
this._reset( args, env );
// FIXME: pass actual title?
this.loopCheck = new LoopCheck( null );
this.loopAndDepthCheck = new LoopAndDepthCheck( null );
}
// Inherit from TokenTransformManager, and thus also from EventEmitter.
@ -336,14 +336,12 @@ AsyncTokenTransformManager.prototype.newChildPipeline = function ( inputType, ar
// now set up a few things on the child AsyncTokenTransformManager.
var child = pipe.last;
// We assume that the title was already checked against this.loopCheck
// We assume that the title was already checked against this.loopAndDepthCheck
// before!
child.loopCheck = new LoopCheck (
this.env.normalizeTitle( this.env.tokensToString ( title ) ),
this.loopCheck
child.loopAndDepthCheck = new LoopAndDepthCheck (
this.loopAndDepthCheck,
this.env.normalizeTitle( this.env.tokensToString ( title ) )
);
// Same for depth!
child.depth = this.depth + 1;
return pipe;
};
@ -357,7 +355,10 @@ AsyncTokenTransformManager.prototype.newChildPipeline = function ( inputType, ar
* first stage of the pipeline, and 'last' pointing to the last stage.
*/
AsyncTokenTransformManager.prototype.getAttributePipeline = function ( inputType, args ) {
return this.childFactories.attributes( inputType, args );
var pipe = this.childFactories.attributes( inputType, args );
var child = pipe.last;
child.loopAndDepthCheck = new LoopAndDepthCheck ( this.loopAndDepthCheck, '' );
return pipe;
};
/**
@ -373,8 +374,6 @@ AsyncTokenTransformManager.prototype._reset = function ( args, env ) {
this.tailAccumulator = undefined;
// eventize: bend to event emitter callback
this.tokenCB = this._returnTokens.bind( this );
this.accum = new TokenAccumulator(null);
this.firstaccum = this.accum;
this.prevToken = undefined;
//console.log( 'AsyncTokenTransformManager args ' + JSON.stringify( args ) );
if ( ! args ) {
@ -412,13 +411,24 @@ AsyncTokenTransformManager.prototype.process = function ( tokens ) {
AsyncTokenTransformManager.prototype.onChunk = function ( tokens ) {
// Set top-level callback to next transform phase
var res = this.transformTokens ( tokens, this.tokenCB );
this.tailAccumulator = res.async;
//console.log('AsyncTokenTransformManager onChunk ', tokens);
//this.phase2TailCB( tokens, true );
if ( ! this.tailAccumulator ) {
this.emit( 'chunk', res.tokens );
} else {
this.tailAccumulator.push( res.tokens );
}
if ( res.async ) {
this.tailAccumulator = res.async;
this.tokenCB = res.async.getParentCB ( 'sibling' );
}
this.emit( 'chunk', res.tokens );
this.env.dp('AsyncTokenTransformManager onChunk ' + res.async);
//this.phase2TailCB( tokens, true );
// The next processed chunk should call back as a sibling to last
// accumulator, if any.
if ( res.async ) {
}
};
/**
@ -519,7 +529,8 @@ AsyncTokenTransformManager.prototype.transformTokens = function ( tokens, parent
*/
AsyncTokenTransformManager.prototype._returnTokens = function ( tokens, notYetDone ) {
//tokens = this._transformPhase2( this.frame, tokens, this.parentCB );
//console.log('AsyncTokenTransformManager._returnTokens, after _transformPhase2.');
this.env.dp('AsyncTokenTransformManager._returnTokens, emitting chunk: ' +
JSON.stringify( tokens ) );
this.emit( 'chunk', tokens );
@ -542,9 +553,11 @@ AsyncTokenTransformManager.prototype._returnTokens = function ( tokens, notYetDo
*/
AsyncTokenTransformManager.prototype.onEndEvent = function () {
if ( this.tailAccumulator ) {
this.env.dp( 'AsyncTokenTransformManager.onEndEvent: calling siblingDone' );
this.tailAccumulator.siblingDone();
} else {
// nothing was asynchronous, so we'll have to emit end here.
this.env.dp( 'AsyncTokenTransformManager.onEndEvent: synchronous done' );
this.emit('end');
this._reset();
}
@ -650,6 +663,8 @@ SyncTokenTransformManager.prototype.onChunk = function ( tokens ) {
}
}
}
this.env.dp( 'SyncTokenTransformManager.onChunk: emitting ' +
JSON.stringify( localAccum ) );
this.emit( 'chunk', localAccum );
};
@ -685,7 +700,7 @@ function AttributeTransformManager ( manager, callback ) {
this.callback = callback;
this.outstanding = 0;
this.kvs = [];
this.pipe = manager.getAttributePipeline( manager.args );
//this.pipe = manager.getAttributePipeline( manager.args );
}
AttributeTransformManager.prototype.process = function ( attributes ) {
@ -834,8 +849,9 @@ TokenAccumulator.prototype._returnTokens = function ( reference, tokens, notYetD
//console.log( 'TokenAccumulator._returnTokens' );
if ( reference === 'child' ) {
tokens = tokens.concat( this.accum );
//console.log('TokenAccumulator._returnTokens: ' +
// JSON.stringify( tokens, null, 2 )
//console.log('TokenAccumulator._returnTokens child: ' +
// JSON.stringify( tokens, null, 2 ) +
// ' outstanding: ' + this.outstanding
// );
this.accum = [];
// XXX: Use some marker to avoid re-transforming token chunks several
@ -862,7 +878,7 @@ TokenAccumulator.prototype._returnTokens = function ( reference, tokens, notYetD
// tokens = res.tokens.concat( this.accum );
// this.accum = [];
//}
this.parentCB( tokens, false );
this.parentCB( tokens, this.outstanding !== 0 );
return null;
} else {
// sibling
@ -870,14 +886,22 @@ TokenAccumulator.prototype._returnTokens = function ( reference, tokens, notYetD
tokens = this.accum.concat( tokens );
// A sibling will transform tokens, so we don't have to do this
// again.
this.parentCB( res.tokens, false );
//console.log( 'TokenAccumulator._returnTokens: sibling done and parentCB ' +
// JSON.stringify( tokens ) );
this.parentCB( tokens, false );
return null;
} else if ( this.outstanding === 1 && notYetDone ) {
//console.log( 'TokenAccumulator._returnTokens: sibling done and parentCB but notYetDone ' +
// JSON.stringify( tokens ) );
// Sibling is not yet done, but child is. Return own parentCB to
// allow the sibling to go direct, and call back parent with
// tokens. The internal accumulator is empty at this stage, as its
// tokens are passed to the parent when the child is done.
return this.parentCB( tokens, true);
} else {
this.accum = this.accum.concat( tokens );
//console.log( 'TokenAccumulator._returnTokens: sibling done, but not overall ' +
// JSON.stringify( tokens ) );
}
@ -913,10 +937,12 @@ TokenAccumulator.prototype.push = function ( token ) {
* @class
* @constructor
*/
function LoopCheck ( title, parent ) {
function LoopAndDepthCheck ( parent, title ) {
if ( parent ) {
this.depth = parent.depth + 1;
this.parent = parent;
} else {
this.depth = 0;
this.parent = null;
}
this.title = title;
@ -928,13 +954,19 @@ function LoopCheck ( title, parent ) {
* @method
* @param {String} Title to check.
*/
LoopCheck.prototype.check = function ( title ) {
LoopAndDepthCheck.prototype.check = function ( title ) {
// XXX: set limit really low for testing!
if ( this.depth > 6 ) {
// too deep
//console.log( 'Loopcheck: ' + JSON.stringify( this, null, 2 ) );
return 'Template expansion depth limit exceeded at ';
}
var elem = this;
do {
//console.log( 'loop check: ' + title + ' vs ' + elem.title );
if ( elem.title === title ) {
// Loop detected
return true;
return 'Template expansion loop detected at ';
}
elem = elem.parent;
} while ( elem );

View file

@ -29,6 +29,10 @@ MWParserEnvironment.prototype.lookupKV = function ( kvs, key ) {
};
MWParserEnvironment.prototype.KVtoHash = function ( kvs ) {
if ( ! kvs ) {
console.log( "Invalid kvs!: " + JSON.stringify( kvs, null, 2 ) );
return {};
}
var res = {};
for ( var i = 0, l = kvs.length; i < l; i++ ) {
var kv = kvs[i],
@ -106,13 +110,21 @@ MWParserEnvironment.prototype.tokensToString = function ( tokens ) {
}
for ( var i = 0, l = tokens.length; i < l; i++ ) {
var token = tokens[i];
//console.log( 'MWParserEnvironment.tokensToString, token: ' + JSON.stringify( token ) );
if ( ! token ) {
console.trace();
console.log( 'MWParserEnvironment.tokensToString, invalid token: ' +
JSON.stringify( token ) );
continue;
}
if ( token.type === 'TEXT' ) {
out.push( token.value );
} else if ( token.type === 'COMMENT' || token.type === 'NEWLINE' ) {
// strip comments and newlines
} else {
var tstring = JSON.stringify( token );
//console.log ( 'MWParserEnvironment.tokensToString, non-text token: ' + tstring );
//out.push( tstring );
console.log ( 'MWParserEnvironment.tokensToString, non-text token: ' +
tstring + JSON.stringify( tokens, null, 2 ) );
out.push( tstring );
}
}
//console.log( 'MWParserEnvironment.tokensToString result: ' + out.join('') );
@ -120,6 +132,16 @@ MWParserEnvironment.prototype.tokensToString = function ( tokens ) {
};
/**
* Simple debug helper
*/
MWParserEnvironment.prototype.dp = function ( ) {
if ( this.debug ) {
console.log( JSON.stringify( arguments, null, 2 ) );
}
};
if (typeof module == "object") {
module.exports.MWParserEnvironment = MWParserEnvironment;

View file

@ -11,7 +11,8 @@
DOMConverter = require('./mediawiki.DOMConverter.js').DOMConverter,
optimist = require('optimist');
var parser = new ParserPipeline( new ParserEnv({ fetchTemplates: true }) );
var env = new ParserEnv( { fetchTemplates: true } ),
parser = new ParserPipeline( env );
process.stdin.resume();
@ -33,6 +34,12 @@
process.stdout.write( output );
// add a trailing newline for shell user's benefit
process.stdout.write( "\n" );
if ( env.debug ) {
// Also print out the html
process.stderr.write( document.body.innerHTML );
process.stderr.write( "\n" );
}
process.exit(0);
});
// Kick off the pipeline by feeding the input into the parser pipeline