Implement a few parser functions. 220 parser tests now passing.

This commit is contained in:
Gabriel Wicke 2012-01-21 20:38:13 +00:00
parent 1a6546fbca
commit 785a4af76f
Notes: Gabriel Wicke 2012-02-27 16:40:01 +00:00
6 changed files with 200 additions and 25 deletions

View file

@ -0,0 +1,145 @@
/**
* Some parser functions
*/
function ParserFunctions ( manager ) {
this.manager = manager;
}
ParserFunctions.prototype.fun = {};
ParserFunctions.prototype['pf_#if'] = function ( target, argList, argDict ) {
if ( target.trim() !== '' ) {
this.manager.env.dp('#if, first branch', argDict[1] );
return argDict[1] || [];
} else {
this.manager.env.dp('#if, second branch', argDict[2] );
return argDict[2] || [];
}
};
ParserFunctions.prototype['pf_#switch'] = function ( target, argList, argDict ) {
this.manager.env.dp( 'switch enter: ' + target.trim() +
' looking in ', argDict );
if ( target.trim() in argDict ) {
this.manager.env.dp( 'switch found: ' + target.trim() +
' res=', argDict[target.trim()] );
return argDict[target.trim()];
} else if ( '#default' in argDict ) {
return argDict['#default'];
} else {
var lastKV = argList[argList.length - 1];
if ( lastKV && ! lastKV[0].length ) {
return lastKV[1];
} else {
return [];
}
}
};
// #ifeq
ParserFunctions.prototype['pf_#ifeq'] = function ( target, argList, argDict ) {
if ( ! argList.length ) {
return [];
} else {
if ( target.trim() === this.manager.env.tokensToString( argList[0][1] ).trim() ) {
return ( argList[1] && argList[1][1]) || [];
} else {
return ( argList[1] && argList[1][1]) || [];
}
}
};
ParserFunctions.prototype['pf_lc'] = function ( target, argList, argDict ) {
return [{type: 'TEXT', value: target.toLowerCase()}];
};
ParserFunctions.prototype['pf_uc'] = function ( target, argList, argDict ) {
return [{type: 'TEXT', value: target.toUpperCase()}];
};
ParserFunctions.prototype['pf_ucfirst'] = function ( target, argList, argDict ) {
if ( target ) {
return [{
type: 'TEXT',
value: target[0].toUpperCase() + target.substr(1)
}];
} else {
return [];
}
};
ParserFunctions.prototype['pf_lcfirst'] = function ( target, argList, argDict ) {
if ( target ) {
return [{
type: 'TEXT',
value: target[0].toLowerCase() + target.substr(1)
}];
} else {
return [];
}
};
ParserFunctions.prototype['pf_#tag'] = function ( target, argList, argDict ) {
return [{type: 'TAG', name: target, attribs: argList}];
};
// FIXME
ParserFunctions.prototype['pf_#ifexpr'] = function ( target, argList, argDict ) {
return [];
};
ParserFunctions.prototype['pf_#iferror'] = function ( target, argList, argDict ) {
return [];
};
ParserFunctions.prototype['pf_#expr'] = function ( target, argList, argDict ) {
return [];
};
ParserFunctions.prototype['pf_#ifexist'] = function ( target, argList, argDict ) {
return [];
};
// pure fake..
ParserFunctions.prototype['pf_formatnum'] = function ( target, argList, argDict ) {
return [{type: 'TEXT', value: target}];
};
ParserFunctions.prototype['pf_currentpage'] = function ( target, argList, argDict ) {
return [{type: 'TEXT', value: target}];
};
ParserFunctions.prototype['pf_pagename'] = function ( target, argList, argDict ) {
return [{type: 'TEXT', value: target}];
};
ParserFunctions.prototype['pf_pagename'] = function ( target, argList, argDict ) {
return [{type: 'TEXT', value: target}];
};
ParserFunctions.prototype['pf_fullpagename'] = function ( target, argList, argDict ) {
return [{type: 'TEXT', value: target}];
};
ParserFunctions.prototype['pf_fullpagenamee'] = function ( target, argList, argDict ) {
return [{type: 'TEXT', value: target}];
};
ParserFunctions.prototype['pf_fullurl'] = function ( target, argList, argDict ) {
return [{type: 'TEXT', value: target}];
};
ParserFunctions.prototype['pf_urlencode'] = function ( target, argList, argDict ) {
return [{type: 'TEXT', value: target}];
};
ParserFunctions.prototype['pf_namespace'] = function ( target, argList, argDict ) {
return [{type: 'TEXT', value: 'Main'}];
};
// FIXME! This is just fake.
ParserFunctions.prototype['pf_#time'] = function ( target, argList, argDict ) {
return [{type: 'TEXT', value: new Date().toString()}];
};
// #time
// #ifexp
// PAGENAME
// #expr
// NAMESPACE
// #iferror
//
//ParserFunctions.prototype['pf_FORMATNUM'] = function ( target, argList, argDict ) {
//};
if (typeof module == "object") {
module.exports.ParserFunctions = ParserFunctions;
}

View file

@ -14,6 +14,7 @@ var $ = require('jquery'),
request = require('request'),
events = require('events'),
qs = require('querystring'),
ParserFunctions = require('./ext.core.ParserFunctions.js').ParserFunctions,
AttributeTransformManager = require('./mediawiki.TokenTransformManager.js')
.AttributeTransformManager;
@ -21,6 +22,7 @@ var $ = require('jquery'),
function TemplateHandler ( manager ) {
this.reset();
this.register( manager );
this.parserFunctions = new ParserFunctions( manager );
}
TemplateHandler.prototype.reset = function ( token ) {
@ -160,6 +162,30 @@ TemplateHandler.prototype._expandTemplate = function ( tplExpandData ) {
var target = this.manager.env.normalizeTitle(
this.manager.env.tokensToString( tplExpandData.target )
);
var args = this.manager.env.KVtoHash( tplExpandData.expandedArgs );
this.manager.env.dp( 'argHash: ', args );
var prefix = target.split(':', 1)[0].toLowerCase();
if ( prefix && 'pf_' + prefix in this.parserFunctions ) {
var funcArg = target.substr( prefix.length + 1 );
this.manager.env.dp( 'entering prefix', funcArg, args );
var res = this.parserFunctions[ 'pf_' + prefix ]( funcArg,
tplExpandData.expandDone, args );
// XXX: support async parser functions!
if ( tplExpandData.overallAsync ) {
this.manager.env.dp( 'TemplateHandler._expandTemplate: calling back ' +
'after parser func ' + prefix + ' with res:' + JSON.stringify( res ) );
return tplExpandData.cb( res, false );
} else {
this.manager.env.dp( 'TemplateHandler._expandTemplate: sync return ' +
'after parser func ' + prefix + ' with res:' + JSON.stringify( res ) );
return { tokens: res };
//data.reset();
}
}
var checkRes = this.manager.loopAndDepthCheck.check( target );
if( checkRes ) {
// Loop detected, abort!
@ -191,10 +217,9 @@ TemplateHandler.prototype._expandTemplate = function ( tplExpandData ) {
// 'text/wiki' input and asynchronous stage-2 transforms).
var inputPipeline = this.manager.newChildPipeline(
this.manager.inputType || 'text/wiki',
this.manager.env.KVtoHash( tplExpandData.expandedArgs ),
args,
tplExpandData.target
);
this.manager.env.dp( 'argHash:', this.manager.env.KVtoHash( tplExpandData.expandedArgs ) );
// Hook up the inputPipeline output events to our handlers
inputPipeline.addListener( 'chunk', this._onChunk.bind ( this, tplExpandData ) );
@ -301,7 +326,7 @@ TemplateHandler.prototype._fetchTemplateAndTitle = function ( title, callback, t
// Unwind the stack
process.nextTick(
function () {
callback( self.manager.env.pageCache[title], title )
callback( self.manager.env.pageCache[title], title );
}
);
} else if ( ! this.manager.env.fetchTemplates ) {
@ -314,7 +339,7 @@ TemplateHandler.prototype._fetchTemplateAndTitle = function ( title, callback, t
this.manager.env.dp( 'trying to fetch ' + title );
// Start a new request if none is outstanding
this.manager.env.dp( 'requestQueue: ', this.manager.env.requestQueue)
this.manager.env.dp( 'requestQueue: ', this.manager.env.requestQueue);
if ( this.manager.env.requestQueue[title] === undefined ) {
this.manager.env.requestQueue[title] = new TemplateRequest( this.manager, title );
}
@ -365,9 +390,9 @@ TemplateHandler.prototype._returnArgAttributes = function ( token, cb, frame, at
// ' vs. ' + JSON.stringify( this.manager.args ) );
res = this.manager.args[argName];
} else {
console.log( 'templateArg not found: ' + argName +
this.manager.env.dp( 'templateArg not found: ' + argName +
' vs. ' + JSON.stringify( this.manager.args ) );
if ( false && defaultValue.length ) {
if ( defaultValue.length ) {
res = defaultValue;
} else {
res = [{ type: 'TEXT', value: '{{{' + argName + '}}}' }];
@ -414,10 +439,12 @@ function TemplateRequest ( manager, title ) {
manager.env.dp(error);
self.emit('src', 'Page/template fetch failure for title ' + title, title);
} else if(response.statusCode == 200) {
var src = '';
var src = '',
data,
normalizedTitle;
try {
//console.log( 'body: ' + body );
var data = JSON.parse( body );
data = JSON.parse( body );
} catch(e) {
console.log( "Error: while parsing result. Error was: " );
console.log( e );
@ -426,13 +453,13 @@ function TemplateRequest ( manager, title ) {
console.log( "------------------------------------------" );
}
try {
$.each(data.query.pages, function(i, page) {
$.each( data.query.pages, function(i, page) {
if (page.revisions && page.revisions.length) {
src = page.revisions[0]['*'];
title = page.title;
normalizeTitle = page.title;
}
});
} catch ( e ) {
} catch ( e2 ) {
console.log( 'Did not find page revisions in the returned body:' + body );
src = '';
}
@ -445,7 +472,9 @@ function TemplateRequest ( manager, title ) {
// XXX: handle other status codes
// Remove self from request queue
manager.env.dp( 'trying to remove ' + title + ' from requestQueue' );
delete manager.env.requestQueue[title];
manager.env.dp( 'after deletion:', manager.env.requestQueue );
});
}

View file

@ -444,7 +444,7 @@ AsyncTokenTransformManager.prototype.transformTokens = function ( tokens, parent
phaseEndRank = 2, // XXX: parametrize!
// Prepare a new accumulator, to be used by async children (if any)
localAccum = [],
accum = new TokenAccumulator( parentCB ),
accum = new TokenAccumulator( this, parentCB ),
cb = accum.getParentCB( 'child' ),
activeAccum = null,
tokensLength = tokens.length,
@ -506,7 +506,7 @@ AsyncTokenTransformManager.prototype.transformTokens = function ( tokens, parent
// The child now switched to activeAccum, we have to create a new
// accumulator for the next potential child.
activeAccum = accum;
accum = new TokenAccumulator( activeAccum.getParentCB( 'sibling' ) );
accum = new TokenAccumulator( this, activeAccum.getParentCB( 'sibling' ) );
cb = accum.getParentCB( 'child' );
}
}
@ -831,7 +831,8 @@ AttributeTransformManager.prototype._returnAttributeKey = function ( ref, tokens
* @param {Object} next TokenAccumulator to link to
* @param {Array} (optional) tokens, init accumulator with tokens or []
*/
function TokenAccumulator ( parentCB ) {
function TokenAccumulator ( manager, parentCB ) {
this.manager = manager;
this.parentCB = parentCB;
this.accum = [];
// Wait for child and sibling by default
@ -870,7 +871,7 @@ TokenAccumulator.prototype._returnTokens = function ( reference, tokens, notYetD
//console.log( 'TokenAccumulator._returnTokens' );
if ( reference === 'child' ) {
tokens = tokens.concat( this.accum );
console.log('TokenAccumulator._returnTokens child: ' +
this.manager.env.dp('TokenAccumulator._returnTokens child: ' +
JSON.stringify( tokens, null, 2 ) +
' outstanding: ' + this.outstanding
);
@ -883,12 +884,12 @@ TokenAccumulator.prototype._returnTokens = function ( reference, tokens, notYetD
tokens = this.accum.concat( tokens );
// A sibling will transform tokens, so we don't have to do this
// again.
console.log( 'TokenAccumulator._returnTokens: sibling done and parentCB ' +
this.manager.env.dp( 'TokenAccumulator._returnTokens: sibling done and parentCB ' +
JSON.stringify( tokens ) );
this.parentCB( tokens, false );
return null;
} else if ( this.outstanding === 1 && notYetDone ) {
console.log( 'TokenAccumulator._returnTokens: sibling done and parentCB but notYetDone ' +
this.manager.env.dp( 'TokenAccumulator._returnTokens: sibling done and parentCB but notYetDone ' +
JSON.stringify( tokens ) );
// Sibling is not yet done, but child is. Return own parentCB to
// allow the sibling to go direct, and call back parent with
@ -897,7 +898,7 @@ TokenAccumulator.prototype._returnTokens = function ( reference, tokens, notYetD
return this.parentCB( tokens, true);
} else {
this.accum = this.accum.concat( tokens );
console.log( 'TokenAccumulator._returnTokens: sibling done, but not overall. notYetDone=' +
this.manager.env.dp( 'TokenAccumulator._returnTokens: sibling done, but not overall. notYetDone=' +
notYetDone + ', this.outstanding=' + this.outstanding +
', this.accum=' +
JSON.stringify( this.accum, null, 2 ) );
@ -955,7 +956,7 @@ function LoopAndDepthCheck ( parent, title ) {
*/
LoopAndDepthCheck.prototype.check = function ( title ) {
// XXX: set limit really low for testing!
if ( this.depth > 5 ) {
if ( this.depth > 40 ) {
// too deep
//console.log( 'Loopcheck: ' + JSON.stringify( this, null, 2 ) );
return 'Expansion depth limit exceeded at ';

View file

@ -116,7 +116,7 @@ MWParserEnvironment.prototype.tokensToString = function ( tokens ) {
var token = tokens[i];
if ( ! token ) {
console.trace();
console.log( 'MWParserEnvironment.tokensToString, invalid token: ' +
this.dp( 'MWParserEnvironment.tokensToString, invalid token: ' +
JSON.stringify( token ) );
continue;
}
@ -126,7 +126,7 @@ MWParserEnvironment.prototype.tokensToString = function ( tokens ) {
// strip comments and newlines
} else {
var tstring = JSON.stringify( token );
console.log ( 'MWParserEnvironment.tokensToString, non-text token: ' +
this.dp ( 'MWParserEnvironment.tokensToString, non-text token: ' +
tstring + JSON.stringify( tokens, null, 2 ) );
out.push( tstring );
}

View file

@ -20,7 +20,7 @@
wgScriptExtension: ".php",
fetchTemplates: true,
// enable/disable debug output using this switch
debug: true
debug: false
} ),
parser = new ParserPipeline( env );

View file

@ -782,7 +782,7 @@ template_param
} / c:template_param_text {
return [[], flatten( c ) ];
}
/ & '|' { return [[], []]; }
/ & [|}] { return [[], []]; }
// FIXME: handle template args and templates in key! (or even parser functions?)