mediawiki-extensions-Visual.../modules/parser/ext.core.ParserFunctions.js
Gabriel Wicke 8368e17d6a Biggish token transform system refactoring
* All parser pipelines including tokenizer and DOM stuff are now constructed
  from a 'recipe' data structure in a ParserPipelineFactory.

* All sub-pipelines of these can now be cached

* Event registrations to a pipeline are directly forwarded to the last
  pipeline member to save relatively expensive event forwarding.

* Some APIs for on-demand expansion / format conversion of parameters from
  parser functions are added:

  param.to('tokens/expanded', cb)
  param.to('text/wiki', cb) (this does not work yet)

  All parameters are additionally wrapped into a Param object that provides
  method for positional parameter naming (.named() or conversion to a dict
  (.dict()).

* The async token transform manager is now separated from a frame object, with
  the frame holding arguments, an on-demand expansion method and loop checks.

* Only keys of template parameters are now expanded. Parser functions or
  template arguments trigger an expansion on-demand. This (unsurprisingly)
  makes a big performance difference with typical switch-heavy template
  systems.

* Return values from async transforms are no longer used in favor of plain
  callbacks. This saves the complication of having to maintain two code paths.
  A trick in transformTokens still avoids the construction of unneeded
  TokenAccumulators.

* The results of template expansions are no longer buffered.

* 301 parser tests are passing

Known issues:

* Cosmetic cleanup remains to do
* Some parser functions do not support async expansions yet, and need to be
  modified.

Change-Id: I1a7690baffbe8141cadf67270904a1b2e1df879a
2012-04-25 16:51:36 +02:00

562 lines
18 KiB
JavaScript

/**
* Some parser functions, and quite a bunch of stubs of parser functions.
* There are still quite a few missing, see
* http://www.mediawiki.org/wiki/Help:Magic_words and
* http://www.mediawiki.org/wiki/Help:Extension:ParserFunctions.
* Instantiated and called by the TemplateHandler extension. Any pf_<prefix>
* matching a lower-cased template name prefix up to the first colon will
* override that template.
*
* TODO: Implement these more thoroughly, and test against
* extensions/ParserFunction/
* convertTests.txt
* exprTests.txt
* funcsParserTests.txt
* stringFunctionTests.txt
*
* @author Gabriel Wicke <gwicke@wikimedia.org>
*/
function ParserFunctions ( manager ) {
this.manager = manager;
this.env = manager.env;
}
// Temporary helper.
ParserFunctions.prototype._rejoinKV = function ( kv ) {
if ( kv.k && kv.k.length ) {
return kv.k.concat( ['='], kv.v );
} else {
return kv.v;
}
};
ParserFunctions.prototype['pf_#if'] = function ( token, frame, cb, args ) {
var target = args[0].k;
if ( target.trim() !== '' ) {
//this.env.dp('#if, first branch', target.trim(), argDict[1] );
cb( { tokens: (args[1] && this._rejoinKV( args[1] ) || [] ) } );
} else {
//this.env.dp('#if, second branch', target.trim(), argDict[2] );
cb( { tokens: (args[2] && this._rejoinKV( args[2] ) || [] ) } );
}
};
ParserFunctions.prototype._switchLookupFallback = function ( kvs, key, dict, cb, v ) {
var kv,
l = kvs.length;
this.manager.env.dp('_switchLookupFallback', kvs.length, key, v );
if ( v && key === this.manager.env.tokensToString( v ).trim() ) {
// found. now look for the next entry with a non-empty key.
this.manager.env.dp( 'switch found' );
for ( var j = 0; j < l; j++) {
kv = kvs[j];
// XXX: make sure the key is always one of these!
if ( kv.k.length ) {
return cb( { tokens: kv.v } );
}
}
// No value found, return empty string? XXX: check this
return cb( { } );
} else if ( kvs.length ) {
var i = 0;
if ( v ) {
i = 1;
}
for ( ; i < l; i++ ) {
kv = kvs[i];
if ( kv.k.length || !kv.v.length ) {
continue;
} else if ( kv.v.constructor === String ) {
if ( kv.v.trim() !== key ) {
// Shortcut
continue
} else {
return this._switchLookupFallback( kvs.slice(i), key, dict, cb, kv.v );
}
} else {
return kv.v.to('tokens/expanded',
this._switchLookupFallback.bind( this, kvs.slice(i), key, dict, cb ) );
}
}
// value not found!
if ( '#default' in dict ) {
cb( { tokens: dict['#default'] } );
} else if ( kvs.length ) {
var lastKV = kvs[kvs.length - 1];
if ( lastKV && ! lastKV.k.length ) {
cb ( { tokens: lastKV.v } );
} else {
cb ( {} );
}
} else {
cb ( {} );
}
}
};
// TODO: Implement
// http://www.mediawiki.org/wiki/Help:Extension:ParserFunctions#Grouping_results
ParserFunctions.prototype['pf_#switch'] = function ( token, frame, cb, args ) {
target = args[0].k.trim();
this.env.dp( 'switch enter', target, token );
// create a dict from the remaining args
args.shift();
var dict = args.dict();
if ( dict[target] !== undefined ) {
this.env.dp( 'switch found: ', target, dict, ' res=', dict[target] );
cb ( {tokens: dict[target] } );
} else {
cb ( { async: true } );
this._switchLookupFallback( args, target, dict, cb );
}
};
// #ifeq
ParserFunctions.prototype['pf_#ifeq'] = function ( token, frame, cb, args ) {
if ( args.length < 3 ) {
cb( {} );
} else {
if ( args[0].k.trim() === this.env.tokensToString( args[1].v ).trim() ) {
cb( { tokens: ( args[2] && this._rejoinKV( args[2] ) || [] ) } );
} else {
cb( { tokens: ( args[3] && this._rejoinKV( args[3] ) || [] ) } );
}
}
};
ParserFunctions.prototype['pf_#expr'] = function ( token, frame, cb, args ) {
var res,
target = args[0].k;
if ( target ) {
try {
// FIXME: make this safe and implement MW expressions!
var f = new Function ( 'return (' + target + ')' );
res = f();
} catch ( e ) {
return cb( { tokens: [ 'class="error" in expression ' + target ] } );
}
} else {
res = '';
}
cb( { tokens: [ res.toString() ] } );
};
ParserFunctions.prototype['pf_#ifexpr'] = function ( token, frame, cb, args ) {
this.env.dp( '#ifexp: ' + JSON.stringify( args ) );
var res = null,
target = args[0].k;
if ( target ) {
try {
// FIXME: make this safe, and fully implement MW expressions!
var f = new Function ( 'return (' + target + ')' );
res = f();
} catch ( e ) {
cb( { tokens: [ 'class="error" in expression ' + target ] } );
}
}
if ( res ) {
cb( { tokens: args[1] && this._rejoinKV( args[1] ) || [] } );
} else {
cb( { tokens: args[2] && this._rejoinKV( args[2] ) || [] } );
}
};
ParserFunctions.prototype['pf_#iferror'] = function ( token, frame, cb, args ) {
var target = args[0].k;
if ( target.indexOf( 'class="error"' ) >= 0 ) {
cb( { tokens: args[1] && args[1].v || [] } );
} else {
cb( { tokens: args[2] && args[2].v || [ target ] } );
}
};
ParserFunctions.prototype.pf_lc = function ( token, frame, cb, args ) {
cb( { tokens: [ args[0].k.toLowerCase() ] } );
};
ParserFunctions.prototype.pf_uc = function ( token, frame, cb, args ) {
cb( { tokens: [ args[0].k.toUpperCase() ] } );
};
ParserFunctions.prototype.pf_ucfirst = function ( token, frame, cb, args ) {
var target = args[0].k;
if ( target ) {
cb( { tokens: [ target[0].toUpperCase() + target.substr(1) ] } );
} else {
cb( {} );
}
};
ParserFunctions.prototype.pf_lcfirst = function ( token, frame, cb, args ) {
var target = args[0].k;
if ( target ) {
cb( { tokens: [ target[0].toLowerCase() + target.substr(1) ] } );
} else {
cb( {} );
}
};
ParserFunctions.prototype.pf_padleft = function ( token, frame, cb, args ) {
var target = args[0].k,
pad;
if ( args[1] && args[1].v > 0) {
if ( args[2] && args[2].v ) {
pad = args[2].v;
} else {
pad = '0';
}
var n = args[1].v;
while ( target.length < n ) {
target = pad + target;
}
cb( { tokens: [target] } );
} else {
cb( {} );
}
};
ParserFunctions.prototype.pf_padright = function ( token, frame, cb, args ) {
var target = args[0].k;
if ( args[1] && args[1].v > 0) {
if ( args[2] && args[2].v ) {
pad = args[2].v;
} else {
pad = '0';
}
var n = args[1].v;
while ( target.length < n ) {
target = target + pad;
}
cb( { tokens: [target] } );
} else {
cb( {} );
}
};
ParserFunctions.prototype['pf_#tag'] = function ( token, frame, cb, args ) {
// TODO: handle things like {{#tag:nowiki|{{{input1|[[shouldnotbelink]]}}}}}
// https://www.mediawiki.org/wiki/Future/Parser_development#Token_stream_transforms
var target = args[0].k;
cb( { tokens: ( [ new TagTk( target ) ]
.concat( args[1].v,
[ new EndTagTk( target ) ] ) ) } );
};
// TODO: These are just quick wrappers for now, optimize!
ParserFunctions.prototype.pf_currentyear = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'Y', [], {} ) );
};
ParserFunctions.prototype.pf_currentmonth = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'm', [], {} ) );
};
ParserFunctions.prototype.pf_currentmonthname = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'F', [], {} ) );
};
// XXX Actually use genitive form!
ParserFunctions.prototype.pf_currentmonthnamegen = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'F', [], {} ) );
};
ParserFunctions.prototype.pf_currentmonthabbrev = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'M', [], {} ) );
};
ParserFunctions.prototype.pf_currentweek = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'W', [], {} ) );
};
ParserFunctions.prototype.pf_currentdow = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'w', [], {} ) );
};
ParserFunctions.prototype.pf_currentday = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'j', [], {} ) );
};
ParserFunctions.prototype.pf_currentday2 = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'd', [], {} ) );
};
ParserFunctions.prototype.pf_currentdayname = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'l', [], {} ) );
};
ParserFunctions.prototype.pf_currenttime = function ( token, frame, cb, args ) {
cb( this._pf_time_tokens( 'H:i', [], {} ) );
};
// A first approximation of time stuff.
// TODO: Implement time spec (+ 1 day etc), check if formats are complete etc.
// See http://www.mediawiki.org/wiki/Help:Extension:ParserFunctions#.23time
// for the full list of requirements!
//
// First (very rough) approximation below based on
// http://jacwright.com/projects/javascript/date_format/, MIT licensed.
ParserFunctions.prototype['pf_#time'] = function ( token, frame, cb, args ) {
cb ( { tokens: this._pf_time( args[0].k, args.slice(1) ) } );
};
ParserFunctions.prototype._pf_time_tokens = function ( target, args ) {
return { tokens: this._pf_time( target, args ) };
};
ParserFunctions.prototype._pf_time = function ( target, args ) {
var res,
tpl = target.trim();
//try {
// var date = new Date( this.env.tokensToString( args[1].v ) );
// res = [ date.format( target ) ];
//} catch ( e ) {
// this.env.dp( 'ERROR: #time ' + e );
try {
res = [ new Date().format ( tpl ) ];
} catch ( e2 ) {
this.env.dp( 'ERROR: #time ' + e2 );
res = [ new Date().toString() ];
}
return res;
};
// Simulates PHP's date function
// FIXME: don't patch Date.prototype!
Date.prototype.format = function(format) {
var returnStr = '';
var replace = Date.replaceChars;
for (var i = 0; i < format.length; i++) {
var curChar = format.charAt(i);
if (i - 1 >= 0 && format.charAt(i - 1) == "\\") {
returnStr += curChar;
}
else if (replace[curChar]) {
returnStr += replace[curChar].call(this);
} else if (curChar != "\\"){
returnStr += curChar;
}
}
return returnStr;
};
// XXX: support localization
Date.replaceChars = {
shortMonths: ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
'Sep', 'Oct', 'Nov', 'Dec'],
longMonths: ['January', 'February', 'March', 'April', 'May', 'June',
'July', 'August', 'September', 'October', 'November', 'December'],
shortDays: ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'],
longDays: ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
'Friday', 'Saturday'],
// Day
d: function() { return (this.getDate() < 10 ? '0' : '') + this.getDate(); },
D: function() { return Date.replaceChars.shortDays[this.getDay()]; },
j: function() { return this.getDate(); },
l: function() { return Date.replaceChars.longDays[this.getDay()]; },
N: function() { return this.getDay() + 1; },
S: function() {
return (this.getDate() % 10 == 1 &&
this.getDate() != 11 ? 'st' : (this.getDate() % 10 == 2 &&
this.getDate() != 12 ? 'nd' : (this.getDate() % 10 == 3 &&
this.getDate() != 13 ? 'rd' : 'th')));
},
w: function() { return this.getDay(); },
z: function() {
var d = new Date(this.getFullYear(),0,1);
return Math.ceil((this - d) / 86400000);
},
// Week
W: function() {
var d = new Date(this.getFullYear(), 0, 1);
return Math.ceil((((this - d) / 86400000) + d.getDay() + 1) / 7);
},
// Month
F: function() { return Date.replaceChars.longMonths[this.getMonth()]; },
m: function() { return (this.getMonth() < 9 ? '0' : '') + (this.getMonth() + 1); },
M: function() { return Date.replaceChars.shortMonths[this.getMonth()]; },
n: function() { return this.getMonth() + 1; },
t: function() {
var d = new Date();
return new Date(d.getFullYear(), d.getMonth(), 0).getDate();
},
// Year
L: function() {
var year = this.getFullYear();
return (year % 400 === 0 || (year % 100 !== 0 && year % 4 === 0));
},
o: function() {
var d = new Date(this.valueOf());
d.setDate(d.getDate() - ((this.getDay() + 6) % 7) + 3);
return d.getFullYear();
},
Y: function() { return this.getFullYear(); },
y: function() { return ('' + this.getFullYear()).substr(2); },
// Time
a: function() { return this.getHours() < 12 ? 'am' : 'pm'; },
A: function() { return this.getHours() < 12 ? 'AM' : 'PM'; },
B: function() {
return Math.floor((((this.getUTCHours() + 1) % 24) +
this.getUTCMinutes() / 60 +
this.getUTCSeconds() / 3600) * 1000 / 24);
},
g: function() { return this.getHours() % 12 || 12; },
G: function() { return this.getHours(); },
h: function() {
return ((this.getHours() % 12 || 12) < 10 ? '0' : '') +
(this.getHours() % 12 || 12);
},
H: function() { return (this.getHours() < 10 ? '0' : '') + this.getHours(); },
i: function() { return (this.getMinutes() < 10 ? '0' : '') + this.getMinutes(); },
s: function() { return (this.getSeconds() < 10 ? '0' : '') + this.getSeconds(); },
u: function() {
var m = this.getMilliseconds();
return (m < 10 ? '00' : (m < 100 ? '0' : '')) + m;
},
// Timezone
e: function() { return "Not Yet Supported"; },
I: function() { return "Not Yet Supported"; },
O: function() {
return (-this.getTimezoneOffset() < 0 ? '-' : '+') +
(Math.abs(this.getTimezoneOffset() / 60) < 10 ? '0' : '') +
(Math.abs(this.getTimezoneOffset() / 60)) + '00';
},
P: function() {
return (-this.getTimezoneOffset() < 0 ? '-' : '+') +
(Math.abs(this.getTimezoneOffset() / 60) < 10 ? '0' : '') +
(Math.abs(this.getTimezoneOffset() / 60)) + ':00';
},
T: function() {
var m = this.getMonth();
this.setMonth(0);
var result = this.toTimeString().replace(/^.+ \(?([^\)]+)\)?$/, '$1');
this.setMonth(m);
return result;
},
Z: function() { return -this.getTimezoneOffset() * 60; },
// Full Date/Time
c: function() { return this.format("Y-m-d\\TH:i:sP"); },
r: function() { return this.toString(); },
U: function() { return this.getTime() / 1000; }
};
ParserFunctions.prototype.pf_localurl = function ( token, frame, cb, args ) {
var target = args[0].k;
args = args.slice(1);
cb( { tokens: (
'/' +
// FIXME! Figure out correct prefix to use
//this.env.wgScriptPath +
'index' +
this.env.wgScriptExtension + '?title=' +
this.env.normalizeTitle( target ) + '&' +
args.map(
function( kv ) {
//console.warn( JSON.stringify( kv ) );
return (kv.v !== '' && kv.k + '=' + kv.v ) || kv.k;
}
).join('&')
) } );
};
/**
* Stub section: Pick any of these and actually implement them!
*/
// The page name and similar information should be carried around in
// this.env
ParserFunctions.prototype.pf_formatnum = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [ target ] } );
};
ParserFunctions.prototype.pf_currentpage = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [ target ] } );
};
ParserFunctions.prototype.pf_pagenamee = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [ target.split(':', 2)[1] || '' ] } );
};
ParserFunctions.prototype.pf_fullpagename = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: target && [target] || ["http://example.com/fixme/"] } );
};
ParserFunctions.prototype.pf_fullpagenamee = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: target && [target] || ["http://example.com/fixme/"] } );
};
// This should be doable with the information in the envirionment
// (this.env) already.
ParserFunctions.prototype.pf_fullurl = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: target && [target] || ["http://example.com/fixme/"] } );
};
ParserFunctions.prototype.pf_urlencode = function ( token, frame, cb, args ) {
var target = args[0].k;
this.env.tp( 'urlencode: ' + target );
cb( { tokens: [encodeURIComponent(target.trim())] } );
};
// The following items all depends on information from the Wiki, so are hard
// to implement independently. Some might require using action=parse in the
// API to get the value. See
// http://www.mediawiki.org/wiki/Parsoid#Token_stream_transforms,
// http://etherpad.wikimedia.org/ParserNotesExtensions and
// http://www.mediawiki.org/wiki/Wikitext_parser/Environment.
// There might be better solutions for some of these.
ParserFunctions.prototype['pf_#ifexist'] = function ( token, frame, cb, args ) {
cb( { tokens: ( args[1] && args[1].v ) || [] } );
};
ParserFunctions.prototype.pf_pagesize = function ( token, frame, cb, args ) {
cb( { tokens: [ '100' ] } );
};
ParserFunctions.prototype.pf_sitename = function ( token, frame, cb, args ) {
cb( { tokens: [ "MediaWiki" ] } );
};
ParserFunctions.prototype.pf_anchorencode = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [ target.trim() ] } );
};
ParserFunctions.prototype.pf_protectionlevel = function ( token, frame, cb, args ) {
cb( { tokens: [''] } );
};
ParserFunctions.prototype.pf_ns = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [target] } );
};
ParserFunctions.prototype.pf_subjectspace = function ( token, frame, cb, args ) {
cb( { tokens: ['Main'] } );
};
ParserFunctions.prototype.pf_talkspace = function ( token, frame, cb, args ) {
cb( { tokens: ['Talk'] } );
};
ParserFunctions.prototype.pf_numberofarticles = function ( token, frame, cb, args ) {
cb( { tokens: ["1"] } );
};
ParserFunctions.prototype['pf_#language'] = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [target] } );
};
ParserFunctions.prototype.pf_contentlang = function ( token, frame, cb, args ) {
cb( { tokens: ['en'] } );
};
ParserFunctions.prototype.pf_numberoffiles = function ( token, frame, cb, args ) {
cb( { tokens: ['2'] } );
};
ParserFunctions.prototype.pf_namespace = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [target.split(':').pop() || 'Main'] } );
};
ParserFunctions.prototype.pf_namespacee = function ( token, frame, cb, args ) {
var target = args[0].k;
cb( { tokens: [target.split(':').pop() || 'Main'] } );
};
ParserFunctions.prototype.pf_pagename = function ( token, frame, cb, args ) {
cb( { tokens: [this.env.pageName] } );
};
ParserFunctions.prototype.pf_pagenamebase = function ( token, frame, cb, args ) {
cb( { tokens: [this.env.pageName] } );
};
ParserFunctions.prototype.pf_scriptpath = function ( token, frame, cb, args ) {
cb( { tokens: [this.env.wgScriptPath] } );
};
if (typeof module == "object") {
module.exports.ParserFunctions = ParserFunctions;
}