Temporarily disable the sanitizer and start to support preprocessor

functionality (comments, templates, template arguments) in arbitrary
attributes. The grammar for this is still quite rough, will need to
consolidate that area.
This commit is contained in:
Gabriel Wicke 2012-02-06 19:15:44 +00:00
parent c26243989e
commit 53bf4f2bd0
Notes: Gabriel Wicke 2012-02-27 16:40:01 +00:00
5 changed files with 85 additions and 11 deletions

View file

@ -44,7 +44,7 @@ ParserFunctions.prototype['pf_#switch'] = function ( target, argList, argDict )
// #ifeq // #ifeq
ParserFunctions.prototype['pf_#ifeq'] = function ( target, argList, argDict ) { ParserFunctions.prototype['pf_#ifeq'] = function ( target, argList, argDict ) {
if ( ! argList.length ) { if ( argList.length < 2 ) {
return []; return [];
} else { } else {
if ( target.trim() === this.manager.env.tokensToString( argList[0][1] ).trim() ) { if ( target.trim() === this.manager.env.tokensToString( argList[0][1] ).trim() ) {
@ -80,7 +80,9 @@ ParserFunctions.prototype['pf_lcfirst'] = function ( target, argList, argDict )
}; };
ParserFunctions.prototype['pf_#tag'] = function ( target, argList, argDict ) { ParserFunctions.prototype['pf_#tag'] = function ( target, argList, argDict ) {
return [new TagTk(target, argList)]; return [ new TagTk( target ),
argList[0].v,
new EndTagTk( target ) ];
}; };
// A first approximation, anyway.. // A first approximation, anyway..

View file

@ -168,7 +168,9 @@ TemplateHandler.prototype._expandTemplate = function ( tplExpandData ) {
var prefix = target.split(':', 1)[0].toLowerCase().trim(); var prefix = target.split(':', 1)[0].toLowerCase().trim();
if ( prefix && 'pf_' + prefix in this.parserFunctions ) { if ( prefix && 'pf_' + prefix in this.parserFunctions ) {
var funcArg = target.substr( prefix.length + 1 ); var funcArg = target.substr( prefix.length + 1 );
this.manager.env.tp( 'func prefix: ' + prefix + ' arg=' + funcArg ); this.manager.env.tp( 'func prefix: ' + prefix +
' args=' + JSON.stringify( tplExpandData.expandedArgs, null, 2) +
' funcArg=' + funcArg);
//this.manager.env.dp( 'entering prefix', funcArg, args ); //this.manager.env.dp( 'entering prefix', funcArg, args );
res = this.parserFunctions[ 'pf_' + prefix ]( funcArg, res = this.parserFunctions[ 'pf_' + prefix ]( funcArg,
tplExpandData.expandedArgs, args ); tplExpandData.expandedArgs, args );

View file

@ -116,10 +116,11 @@ MWParserEnvironment.prototype.tokensToString = function ( tokens ) {
} }
for ( var i = 0, l = tokens.length; i < l; i++ ) { for ( var i = 0, l = tokens.length; i < l; i++ ) {
var token = tokens[i]; var token = tokens[i];
if ( ! token ) { if ( token === undefined ) {
console.trace(); console.trace();
this.dp( 'MWParserEnvironment.tokensToString, invalid token: ' + this.tp( 'MWParserEnvironment.tokensToString, invalid token: ' +
JSON.stringify( token ) ); JSON.stringify( token ) +
' tokens:' + JSON.stringify( tokens, null, 2 ));
continue; continue;
} }
if ( token.constructor === String ) { if ( token.constructor === String ) {

View file

@ -77,7 +77,7 @@ function ParserPipeline( env, inputType ) {
// Add token transformations.. // Add token transformations..
new QuoteTransformer( this.tokenPostProcessor ); new QuoteTransformer( this.tokenPostProcessor );
new PostExpandParagraphHandler( this.tokenPostProcessor ); new PostExpandParagraphHandler( this.tokenPostProcessor );
new Sanitizer( this.tokenPostProcessor ); //new Sanitizer( this.tokenPostProcessor );
//var citeExtension = new Cite( this.tokenTransformer ); //var citeExtension = new Cite( this.tokenTransformer );

View file

@ -29,6 +29,36 @@
return es; return es;
}; };
var flatten_string = function ( c ) {
var out = [],
text = [];
c = flatten(c);
for (var i = 0, l = c.length; i < l; i++) {
var ci = c[i];
if (ci.constructor === String) {
if(ci !== '') {
text.push(ci);
}
} else {
if (text.length) {
out.push( text.join('') );
text = [];
}
out.push(ci);
}
}
if (text.length) {
out.push( text.join('') );
}
if ( out.length === 1 && out[0].constructor === String ) {
return out[0];
} else {
return out;
}
};
// Remove escaped quotes from attributes etc // Remove escaped quotes from attributes etc
// This was in the original PEG parser, but could not find anything in // This was in the original PEG parser, but could not find anything in
// MediaWiki that supports \' and \"-style escaping. So remove? -- gwicke // MediaWiki that supports \' and \"-style escaping. So remove? -- gwicke
@ -308,6 +338,40 @@ urltext = ( t:[^'<~[{\n\rfghimnstw|!:\]} &=]+ { return t.join(''); }
/ ' ' & ':' { return "\u00a0"; } / ' ' & ':' { return "\u00a0"; }
/ t:text_char )+ / t:text_char )+
directive
= comment
/ tplarg_or_template
/ htmlentity
spaceless_preprocessor_text
= r:( t:[^'<~[{\n\r|!\]}\t &=]+ { return t.join(''); }
/ directive
/ !inline_breaks !' ' text_char )+ {
return flatten_string ( r );
}
link_preprocessor_text
= r:( t:[^'<~[{\n\r|!\]}\t &=]+ { return t.join(''); }
/ directive
/ !inline_breaks no_punctuation_char
/ s:[.:,] !(space / eolf) { return s }
/ urlencoded_char
/ [&%] )+ {
return flatten_string ( r );
}
// Plain text, but can contain templates, template arguments, comments etc-
// all stuff that is normally handled by the preprocessor
// Returns either a list of tokens, or a plain string (if nothing is to be
// processed).
preprocessor_text
= r:( t:[^'<~[{\n\r\t|!\]} &=]+ { return t.join(''); }
/ directive
/ !inline_breaks text_char )+ {
return flatten_string ( r );
}
/* /*
'//', // for protocol-relative URLs, but not in text! '//', // for protocol-relative URLs, but not in text!
'ftp://', 'ftp://',
@ -553,6 +617,7 @@ inline_element
= //& { dp('inline_element enter' + input.substr(pos, 10)); return true; } = //& { dp('inline_element enter' + input.substr(pos, 10)); return true; }
& '<' ( comment / xmlish_tag ) & '<' ( comment / xmlish_tag )
/ & '{' ( & '{{{{{' template / tplarg / template ) / & '{' ( & '{{{{{' template / tplarg / template )
/ & '{' tplarg_or_template
/// & '{' ( tplarg / template ) /// & '{' ( tplarg / template )
// Eat three opening brackets as text. // Eat three opening brackets as text.
/ '[[[' { return '[[[' } / '[[[' { return '[[[' }
@ -632,9 +697,9 @@ urllink
extlink extlink
= "[" = "["
& { return setFlag('extlink'); } & { return setFlag('extlink'); }
target:(url / tplarg / template) //target:urllink
space* target:link_preprocessor_text
text:inlineline? text:(space* t:inlineline { return t } )?
"]" { "]" {
clearFlag('extlink'); clearFlag('extlink');
if ( text == '' ) { if ( text == '' ) {
@ -642,13 +707,15 @@ extlink
text = [ "[" + linkCount + "]" ]; text = [ "[" + linkCount + "]" ];
linkCount++; linkCount++;
} }
return [ var res = [
new TagTk( 'a', [ new TagTk( 'a', [
new KV('href', target), new KV('href', target),
new KV('data-type', 'external') new KV('data-type', 'external')
] ), ] ),
].concat( text ].concat( text
, [ new EndTagTk( 'a' )]); , [ new EndTagTk( 'a' )]);
//console.log( JSON.stringify( res, null, 2 ) );
return res;
} }
/ "[" & { clearFlag('extlink'); return false; } / "[" & { clearFlag('extlink'); return false; }
@ -713,6 +780,8 @@ ipv6_address
return flatten( a ).join(''); return flatten( a ).join('');
} }
tplarg_or_template = & '{{{{{' template / tplarg / template
template template
= "{{" target:template_param_text = "{{" target:template_param_text
params:(newline? "|" newline? p:template_param { return p })* params:(newline? "|" newline? p:template_param { return p })*