Temporarily disable the sanitizer and start to support preprocessor

functionality (comments, templates, template arguments) in arbitrary
attributes. The grammar for this is still quite rough, will need to
consolidate that area.
This commit is contained in:
Gabriel Wicke 2012-02-06 19:15:44 +00:00
parent c26243989e
commit 53bf4f2bd0
Notes: Gabriel Wicke 2012-02-27 16:40:01 +00:00
5 changed files with 85 additions and 11 deletions

View file

@ -44,7 +44,7 @@ ParserFunctions.prototype['pf_#switch'] = function ( target, argList, argDict )
// #ifeq
ParserFunctions.prototype['pf_#ifeq'] = function ( target, argList, argDict ) {
if ( ! argList.length ) {
if ( argList.length < 2 ) {
return [];
} else {
if ( target.trim() === this.manager.env.tokensToString( argList[0][1] ).trim() ) {
@ -80,7 +80,9 @@ ParserFunctions.prototype['pf_lcfirst'] = function ( target, argList, argDict )
};
ParserFunctions.prototype['pf_#tag'] = function ( target, argList, argDict ) {
return [new TagTk(target, argList)];
return [ new TagTk( target ),
argList[0].v,
new EndTagTk( target ) ];
};
// A first approximation, anyway..

View file

@ -168,7 +168,9 @@ TemplateHandler.prototype._expandTemplate = function ( tplExpandData ) {
var prefix = target.split(':', 1)[0].toLowerCase().trim();
if ( prefix && 'pf_' + prefix in this.parserFunctions ) {
var funcArg = target.substr( prefix.length + 1 );
this.manager.env.tp( 'func prefix: ' + prefix + ' arg=' + funcArg );
this.manager.env.tp( 'func prefix: ' + prefix +
' args=' + JSON.stringify( tplExpandData.expandedArgs, null, 2) +
' funcArg=' + funcArg);
//this.manager.env.dp( 'entering prefix', funcArg, args );
res = this.parserFunctions[ 'pf_' + prefix ]( funcArg,
tplExpandData.expandedArgs, args );

View file

@ -116,10 +116,11 @@ MWParserEnvironment.prototype.tokensToString = function ( tokens ) {
}
for ( var i = 0, l = tokens.length; i < l; i++ ) {
var token = tokens[i];
if ( ! token ) {
if ( token === undefined ) {
console.trace();
this.dp( 'MWParserEnvironment.tokensToString, invalid token: ' +
JSON.stringify( token ) );
this.tp( 'MWParserEnvironment.tokensToString, invalid token: ' +
JSON.stringify( token ) +
' tokens:' + JSON.stringify( tokens, null, 2 ));
continue;
}
if ( token.constructor === String ) {

View file

@ -77,7 +77,7 @@ function ParserPipeline( env, inputType ) {
// Add token transformations..
new QuoteTransformer( this.tokenPostProcessor );
new PostExpandParagraphHandler( this.tokenPostProcessor );
new Sanitizer( this.tokenPostProcessor );
//new Sanitizer( this.tokenPostProcessor );
//var citeExtension = new Cite( this.tokenTransformer );

View file

@ -29,6 +29,36 @@
return es;
};
var flatten_string = function ( c ) {
var out = [],
text = [];
c = flatten(c);
for (var i = 0, l = c.length; i < l; i++) {
var ci = c[i];
if (ci.constructor === String) {
if(ci !== '') {
text.push(ci);
}
} else {
if (text.length) {
out.push( text.join('') );
text = [];
}
out.push(ci);
}
}
if (text.length) {
out.push( text.join('') );
}
if ( out.length === 1 && out[0].constructor === String ) {
return out[0];
} else {
return out;
}
};
// Remove escaped quotes from attributes etc
// This was in the original PEG parser, but could not find anything in
// MediaWiki that supports \' and \"-style escaping. So remove? -- gwicke
@ -308,6 +338,40 @@ urltext = ( t:[^'<~[{\n\rfghimnstw|!:\]} &=]+ { return t.join(''); }
/ ' ' & ':' { return "\u00a0"; }
/ t:text_char )+
directive
= comment
/ tplarg_or_template
/ htmlentity
spaceless_preprocessor_text
= r:( t:[^'<~[{\n\r|!\]}\t &=]+ { return t.join(''); }
/ directive
/ !inline_breaks !' ' text_char )+ {
return flatten_string ( r );
}
link_preprocessor_text
= r:( t:[^'<~[{\n\r|!\]}\t &=]+ { return t.join(''); }
/ directive
/ !inline_breaks no_punctuation_char
/ s:[.:,] !(space / eolf) { return s }
/ urlencoded_char
/ [&%] )+ {
return flatten_string ( r );
}
// Plain text, but can contain templates, template arguments, comments etc-
// all stuff that is normally handled by the preprocessor
// Returns either a list of tokens, or a plain string (if nothing is to be
// processed).
preprocessor_text
= r:( t:[^'<~[{\n\r\t|!\]} &=]+ { return t.join(''); }
/ directive
/ !inline_breaks text_char )+ {
return flatten_string ( r );
}
/*
'//', // for protocol-relative URLs, but not in text!
'ftp://',
@ -553,6 +617,7 @@ inline_element
= //& { dp('inline_element enter' + input.substr(pos, 10)); return true; }
& '<' ( comment / xmlish_tag )
/ & '{' ( & '{{{{{' template / tplarg / template )
/ & '{' tplarg_or_template
/// & '{' ( tplarg / template )
// Eat three opening brackets as text.
/ '[[[' { return '[[[' }
@ -632,9 +697,9 @@ urllink
extlink
= "["
& { return setFlag('extlink'); }
target:(url / tplarg / template)
space*
text:inlineline?
//target:urllink
target:link_preprocessor_text
text:(space* t:inlineline { return t } )?
"]" {
clearFlag('extlink');
if ( text == '' ) {
@ -642,13 +707,15 @@ extlink
text = [ "[" + linkCount + "]" ];
linkCount++;
}
return [
var res = [
new TagTk( 'a', [
new KV('href', target),
new KV('data-type', 'external')
] ),
].concat( text
, [ new EndTagTk( 'a' )]);
//console.log( JSON.stringify( res, null, 2 ) );
return res;
}
/ "[" & { clearFlag('extlink'); return false; }
@ -713,6 +780,8 @@ ipv6_address
return flatten( a ).join('');
}
tplarg_or_template = & '{{{{{' template / tplarg / template
template
= "{{" target:template_param_text
params:(newline? "|" newline? p:template_param { return p })*