Gabriel Wicke 3be4992782 'Obama finally expands' ;) Misc fixes and documentation updates
* [[:en:Barack Obama]] can now be expanded in 77 seconds using 330MB RAM,
  while it would prevously run out of RAM after ~30 minutes. Wohoooo!
  The token transform framework rework really paid off.
* 303 parser tests are passing in the new record time of 5.5 seconds. Two more
  tests are passing since these tests expect the day of the week to be
  Thursday.  Won't be the case tomorrow.

Change-Id: I56e850838476b546df10c6a239c8c9e29a1a3136
2012-04-26 18:18:08 +02:00

563 lines
18 KiB

* Some parser functions, and quite a bunch of stubs of parser functions.
* There are still quite a few missing, see
* and
* Instantiated and called by the TemplateHandler extension. Any pf_<prefix>
* matching a lower-cased template name prefix up to the first colon will
* override that template.
* TODO: Implement these more thoroughly, and test against
* extensions/ParserFunction/
* convertTests.txt
* exprTests.txt
* funcsParserTests.txt
* stringFunctionTests.txt
* @author Gabriel Wicke <>
function ParserFunctions ( manager ) {
this.manager = manager;
this.env = manager.env;
// Temporary helper.
ParserFunctions.prototype._rejoinKV = function ( kv ) {
if ( kv.k && kv.k.length ) {
return kv.k.concat( ['='], kv.v );
} else {
return kv.v;
ParserFunctions.prototype['pf_#if'] = function ( token, frame, cb, args ) {
var target = args[0].k;
if ( target.trim() !== '' ) {
//this.env.dp('#if, first branch', target.trim(), argDict[1] );
cb( { tokens: (args[1] && this._rejoinKV( args[1] ) || [] ) } );
} else {
//this.env.dp('#if, second branch', target.trim(), argDict[2] );
cb( { tokens: (args[2] && this._rejoinKV( args[2] ) || [] ) } );
ParserFunctions.prototype._switchLookupFallback = function ( kvs, key, dict, cb, v ) {
var kv,
l = kvs.length;'swl');
this.manager.env.dp('_switchLookupFallback', kvs.length, key, v );
if ( v && key === this.manager.env.tokensToString( v ).trim() ) {
// found. now look for the next entry with a non-empty key.
this.manager.env.dp( 'switch found' );
for ( var j = 0; j < l; j++) {
kv = kvs[j];
// XXX: make sure the key is always one of these!
if ( kv.k.length ) {
return cb( { tokens: kv.v } );
// No value found, return empty string? XXX: check this
return cb( { } );
} else if ( kvs.length ) {
var i = 0;
if ( v ) {
i = 1;
for ( ; i < l; i++ ) {
kv = kvs[i];
if ( kv.k.length || !kv.v.length ) {
} else if ( kv.v.constructor === String ) {
if ( kv.v.trim() !== key ) {
// Shortcut
} else {
return this._switchLookupFallback( kvs.slice(i), key, dict, cb, kv.v );
} else {
this._switchLookupFallback.bind( this, kvs.slice(i), key, dict, cb ) );
// value not found!
if ( '#default' in dict ) {
cb( { tokens: dict['#default'] } );
} else if ( kvs.length ) {
var lastKV = kvs[kvs.length - 1];
if ( lastKV && ! lastKV.k.length ) {
cb ( { tokens: lastKV.v } );
} else {
cb ( {} );
} else {
cb ( {} );
// TODO: Implement
ParserFunctions.prototype['pf_#switch'] = function ( token, frame, cb, args ) {
target = args[0].k.trim();
this.env.dp( 'switch enter', target, token );
// create a dict from the remaining args
var dict = args.dict();
if ( dict[target] !== undefined ) {
this.env.dp( 'switch found: ', target, dict, ' res=', dict[target] );
cb ( {tokens: dict[target] } );
} else {
cb ( { async: true } );
this._switchLookupFallback( args, target, dict, cb );
// #ifeq
ParserFunctions.prototype['pf_#ifeq'] = function ( token, frame, cb, args ) {
if ( args.length < 3 ) {
cb( {} );
} else {
if ( args[0].k.trim() === this.env.tokensToString( args[1].v ).trim() ) {
cb( { tokens: ( args[2] && this._rejoinKV( args[2] ) || [] ) } );
} else {
cb( { tokens: ( args[3] && this._rejoinKV( args[3] ) || [] ) } );
ParserFunctions.prototype['pf_#expr'] = function ( token, frame, cb, args ) {
var res,
target = args[0].k;
if ( target ) {
try {
// FIXME: make this safe and implement MW expressions!
var f = new Function ( 'return (' + target + ')' );
res = f();
} catch ( e ) {
return cb( { tokens: [ 'class="error" in expression ' + target ] } );
} else {
res = '';
cb( { tokens: [ res.toString() ] } );
ParserFunctions.prototype['pf_#ifexpr'] = function ( token, frame, cb, args ) {
this.env.dp( '#ifexp: ' + JSON.stringify( args ) );
var res = null,
target = args[0].k;
if ( target ) {
try {
// FIXME: make this safe, and fully implement MW expressions!
var f = new Function ( 'return (' + target + ')' );
res = f();
} catch ( e ) {
cb( { tokens: [ 'class="error" in expression ' + target ] } );
if ( res ) {
cb( { tokens: args[1] && this._rejoinKV( args[1] ) || [] } );
} else {
cb( { tokens: args[2] && this._rejoinKV( args[2] ) || [] } );
ParserFunctions.prototype['pf_#iferror'] = function ( token, frame, cb, args ) {
var target = args[0].k;
if ( target.indexOf( 'class="error"' ) >= 0 ) {
cb( { tokens: args[1] && args[1].v || [] } );
} else {
cb( { tokens: args[2] && args[2].v || [ target ] } );
ParserFunctions.prototype.pf_lc = function ( token, frame, cb, args ) {
cb( { tokens: [ args[0].k.toLowerCase() ] } );
ParserFunctions.prototype.pf_uc = function ( token, frame, cb, args ) {
cb( { tokens: [ args[0].k.toUpperCase() ] } );
ParserFunctions.prototype.pf_ucfirst = function ( token, frame, cb, args ) {
var target = args[0].k;
if ( target ) {
cb( { tokens: [ target[0].toUpperCase() + target.substr(1) ] } );
} else {
cb( {} );
ParserFunctions.prototype.pf_lcfirst = function ( token, frame, cb, args ) {
var target = args[0].k;
if ( target ) {
cb( { tokens: [ target[0].toLowerCase() + target.substr(1) ] } );
} else {
cb( {} );
ParserFunctions.prototype.pf_padleft = function ( token, frame, cb, args ) {
var target = args[0].k,
if ( args[1] && args[1].v > 0) {
if ( args[2] && args[2].v ) {
pad = args[2].v;
} else {
pad = '0';
var n = args[1].v;
while ( target.length < n ) {
target = pad + target;
cb( { tokens: [target] } );
} else {
cb( {} );
ParserFunctions.prototype.pf_padright = function ( token, frame, cb, args ) {
var target = args[0].k;
if ( args[1] && args[1].v > 0) {
if ( args[2] && args[2].v ) {
pad = args[2].v;
} else {
pad = '0';
var n = args[1].v;
while ( target.length < n ) {
target = target + pad;
cb( { tokens: [target] } );
} else {
cb( {} );
ParserFunctions.prototype['pf_#tag'] = function ( token, frame, cb, args ) {
// TODO: handle things like {{#tag:nowiki|{{{input1|[[shouldnotbelink]]}}}}}
var target = args[0].k;
cb( { tokens: ( [ new TagTk( target ) ]
.concat( args[1].v,
[ new EndTagTk( target ) ] ) ) } );
// TODO: These are just quick wrappers for now, optimize!
ParserFunctions.prototype.pf_currentyear = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'Y', [], {} ) );
ParserFunctions.prototype.pf_currentmonth = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'm', [], {} ) );
ParserFunctions.prototype.pf_currentmonthname = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'F', [], {} ) );
// XXX Actually use genitive form!
ParserFunctions.prototype.pf_currentmonthnamegen = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'F', [], {} ) );
ParserFunctions.prototype.pf_currentmonthabbrev = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'M', [], {} ) );
ParserFunctions.prototype.pf_currentweek = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'W', [], {} ) );
ParserFunctions.prototype.pf_currentdow = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'w', [], {} ) );
ParserFunctions.prototype.pf_currentday = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'j', [], {} ) );
ParserFunctions.prototype.pf_currentday2 = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'd', [], {} ) );
ParserFunctions.prototype.pf_currentdayname = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'l', [], {} ) );
ParserFunctions.prototype.pf_currenttime = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'H:i', [], {} ) );
// A first approximation of time stuff.
// TODO: Implement time spec (+ 1 day etc), check if formats are complete etc.
// See
// for the full list of requirements!
// First (very rough) approximation below based on
//, MIT licensed.
ParserFunctions.prototype['pf_#time'] = function ( token, frame, cb, args ) {
cb ( { tokens: this._pf_time( args[0].k, args.slice(1) ) } );
ParserFunctions.prototype._pf_time_tokens = function ( target, args ) {
return { tokens: this._pf_time( target, args ) };
ParserFunctions.prototype._pf_time = function ( target, args ) {
var res,
tpl = target.trim();
//try {
// var date = new Date( this.env.tokensToString( args[1].v ) );
// res = [ date.format( target ) ];
//} catch ( e ) {
// this.env.dp( 'ERROR: #time ' + e );
try {
res = [ new Date().format ( tpl ) ];
} catch ( e2 ) {
this.env.dp( 'ERROR: #time ' + e2 );
res = [ new Date().toString() ];
return res;
// Simulates PHP's date function
// FIXME: don't patch Date.prototype!
Date.prototype.format = function(format) {
var returnStr = '';
var replace = Date.replaceChars;
for (var i = 0; i < format.length; i++) {
var curChar = format.charAt(i);
if (i - 1 >= 0 && format.charAt(i - 1) == "\\") {
returnStr += curChar;
else if (replace[curChar]) {
returnStr += replace[curChar].call(this);
} else if (curChar != "\\"){
returnStr += curChar;
return returnStr;
// XXX: support localization
Date.replaceChars = {
shortMonths: ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
'Sep', 'Oct', 'Nov', 'Dec'],
longMonths: ['January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November', 'December'],
shortDays: ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'],
longDays: ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday'],
// Day
d: function() { return (this.getDate() < 10 ? '0' : '') + this.getDate(); },
D: function() { return Date.replaceChars.shortDays[this.getDay()]; },
j: function() { return this.getDate(); },
l: function() { return Date.replaceChars.longDays[this.getDay()]; },
N: function() { return this.getDay() + 1; },
S: function() {
return (this.getDate() % 10 == 1 &&
this.getDate() != 11 ? 'st' : (this.getDate() % 10 == 2 &&
this.getDate() != 12 ? 'nd' : (this.getDate() % 10 == 3 &&
this.getDate() != 13 ? 'rd' : 'th')));
w: function() { return this.getDay(); },
z: function() {
var d = new Date(this.getFullYear(),0,1);
return Math.ceil((this - d) / 86400000);
// Week
W: function() {
var d = new Date(this.getFullYear(), 0, 1);
return Math.ceil((((this - d) / 86400000) + d.getDay() + 1) / 7);
// Month
F: function() { return Date.replaceChars.longMonths[this.getMonth()]; },
m: function() { return (this.getMonth() < 9 ? '0' : '') + (this.getMonth() + 1); },
M: function() { return Date.replaceChars.shortMonths[this.getMonth()]; },
n: function() { return this.getMonth() + 1; },
t: function() {
var d = new Date();
return new Date(d.getFullYear(), d.getMonth(), 0).getDate();
// Year
L: function() {
var year = this.getFullYear();
return (year % 400 === 0 || (year % 100 !== 0 && year % 4 === 0));
o: function() {
var d = new Date(this.valueOf());
d.setDate(d.getDate() - ((this.getDay() + 6) % 7) + 3);
return d.getFullYear();
Y: function() { return this.getFullYear(); },
y: function() { return ('' + this.getFullYear()).substr(2); },
// Time
a: function() { return this.getHours() < 12 ? 'am' : 'pm'; },
A: function() { return this.getHours() < 12 ? 'AM' : 'PM'; },
B: function() {
return Math.floor((((this.getUTCHours() + 1) % 24) +
this.getUTCMinutes() / 60 +
this.getUTCSeconds() / 3600) * 1000 / 24);
g: function() { return this.getHours() % 12 || 12; },
G: function() { return this.getHours(); },
h: function() {
return ((this.getHours() % 12 || 12) < 10 ? '0' : '') +
(this.getHours() % 12 || 12);
H: function() { return (this.getHours() < 10 ? '0' : '') + this.getHours(); },
i: function() { return (this.getMinutes() < 10 ? '0' : '') + this.getMinutes(); },
s: function() { return (this.getSeconds() < 10 ? '0' : '') + this.getSeconds(); },
u: function() {
var m = this.getMilliseconds();
return (m < 10 ? '00' : (m < 100 ? '0' : '')) + m;
// Timezone
e: function() { return "Not Yet Supported"; },
I: function() { return "Not Yet Supported"; },
O: function() {
return (-this.getTimezoneOffset() < 0 ? '-' : '+') +
(Math.abs(this.getTimezoneOffset() / 60) < 10 ? '0' : '') +
(Math.abs(this.getTimezoneOffset() / 60)) + '00';
P: function() {
return (-this.getTimezoneOffset() < 0 ? '-' : '+') +
(Math.abs(this.getTimezoneOffset() / 60) < 10 ? '0' : '') +
(Math.abs(this.getTimezoneOffset() / 60)) + ':00';
T: function() {
var m = this.getMonth();
var result = this.toTimeString().replace(/^.+ \(?([^\)]+)\)?$/, '$1');
return result;
Z: function() { return -this.getTimezoneOffset() * 60; },
// Full Date/Time
c: function() { return this.format("Y-m-d\\TH:i:sP"); },
r: function() { return this.toString(); },
U: function() { return this.getTime() / 1000; }
ParserFunctions.prototype.pf_localurl = function ( token, frame, cb, args ) {
var target = args[0].k;
args = args.slice(1);
cb( { tokens: (
'/' +
// FIXME! Figure out correct prefix to use
//this.env.wgScriptPath +
'index' +
this.env.wgScriptExtension + '?title=' +
this.env.normalizeTitle( target ) + '&' +
function( kv ) {
//console.warn( JSON.stringify( kv ) );
return (kv.v !== '' && kv.k + '=' + kv.v ) || kv.k;
) } );
* Stub section: Pick any of these and actually implement them!
// The page name and similar information should be carried around in
// this.env
ParserFunctions.prototype.pf_formatnum = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [ target ] } );
ParserFunctions.prototype.pf_currentpage = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [ target ] } );
ParserFunctions.prototype.pf_pagenamee = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [ target.split(':', 2)[1] || '' ] } );
ParserFunctions.prototype.pf_fullpagename = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: target && [target] || [""] } );
ParserFunctions.prototype.pf_fullpagenamee = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: target && [target] || [""] } );
// This should be doable with the information in the envirionment
// (this.env) already.
ParserFunctions.prototype.pf_fullurl = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: target && [target] || [""] } );
ParserFunctions.prototype.pf_urlencode = function ( token, frame, cb, args ) {
var target = args[0].k; 'urlencode: ' + target );
cb( { tokens: [encodeURIComponent(target.trim())] } );
// The following items all depends on information from the Wiki, so are hard
// to implement independently. Some might require using action=parse in the
// API to get the value. See
// and
// There might be better solutions for some of these.
ParserFunctions.prototype['pf_#ifexist'] = function ( token, frame, cb, args ) {
cb( { tokens: ( args[1] && args[1].v ) || [] } );
ParserFunctions.prototype.pf_pagesize = function ( token, frame, cb, args ) {
cb( { tokens: [ '100' ] } );
ParserFunctions.prototype.pf_sitename = function ( token, frame, cb, args ) {
cb( { tokens: [ "MediaWiki" ] } );
ParserFunctions.prototype.pf_anchorencode = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [ target.trim() ] } );
ParserFunctions.prototype.pf_protectionlevel = function ( token, frame, cb, args ) {
cb( { tokens: [''] } );
ParserFunctions.prototype.pf_ns = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [target] } );
ParserFunctions.prototype.pf_subjectspace = function ( token, frame, cb, args ) {
cb( { tokens: ['Main'] } );
ParserFunctions.prototype.pf_talkspace = function ( token, frame, cb, args ) {
cb( { tokens: ['Talk'] } );
ParserFunctions.prototype.pf_numberofarticles = function ( token, frame, cb, args ) {
cb( { tokens: ["1"] } );
ParserFunctions.prototype['pf_#language'] = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [target] } );
ParserFunctions.prototype.pf_contentlang = function ( token, frame, cb, args ) {
cb( { tokens: ['en'] } );
ParserFunctions.prototype.pf_numberoffiles = function ( token, frame, cb, args ) {
cb( { tokens: ['2'] } );
ParserFunctions.prototype.pf_namespace = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [target.split(':').pop() || 'Main'] } );
ParserFunctions.prototype.pf_namespacee = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [target.split(':').pop() || 'Main'] } );
ParserFunctions.prototype.pf_pagename = function ( token, frame, cb, args ) {
cb( { tokens: [this.env.pageName] } );
ParserFunctions.prototype.pf_pagenamebase = function ( token, frame, cb, args ) {
cb( { tokens: [this.env.pageName] } );
ParserFunctions.prototype.pf_scriptpath = function ( token, frame, cb, args ) {
cb( { tokens: [this.env.wgScriptPath] } );
if (typeof module == "object") {
module.exports.ParserFunctions = ParserFunctions;