diff --git a/modules/parser/ext.core.ParserFunctions.js b/modules/parser/ext.core.ParserFunctions.js index 176dd9443a..e153adef36 100644 --- a/modules/parser/ext.core.ParserFunctions.js +++ b/modules/parser/ext.core.ParserFunctions.js @@ -44,7 +44,7 @@ ParserFunctions.prototype['pf_#switch'] = function ( target, argList, argDict ) // #ifeq ParserFunctions.prototype['pf_#ifeq'] = function ( target, argList, argDict ) { - if ( ! argList.length ) { + if ( argList.length < 2 ) { return []; } else { if ( target.trim() === this.manager.env.tokensToString( argList[0][1] ).trim() ) { @@ -80,7 +80,9 @@ ParserFunctions.prototype['pf_lcfirst'] = function ( target, argList, argDict ) }; ParserFunctions.prototype['pf_#tag'] = function ( target, argList, argDict ) { - return [new TagTk(target, argList)]; + return [ new TagTk( target ), + argList[0].v, + new EndTagTk( target ) ]; }; // A first approximation, anyway.. diff --git a/modules/parser/ext.core.TemplateHandler.js b/modules/parser/ext.core.TemplateHandler.js index f62e1abae3..17b7644de6 100644 --- a/modules/parser/ext.core.TemplateHandler.js +++ b/modules/parser/ext.core.TemplateHandler.js @@ -168,7 +168,9 @@ TemplateHandler.prototype._expandTemplate = function ( tplExpandData ) { var prefix = target.split(':', 1)[0].toLowerCase().trim(); if ( prefix && 'pf_' + prefix in this.parserFunctions ) { var funcArg = target.substr( prefix.length + 1 ); - this.manager.env.tp( 'func prefix: ' + prefix + ' arg=' + funcArg ); + this.manager.env.tp( 'func prefix: ' + prefix + + ' args=' + JSON.stringify( tplExpandData.expandedArgs, null, 2) + + ' funcArg=' + funcArg); //this.manager.env.dp( 'entering prefix', funcArg, args ); res = this.parserFunctions[ 'pf_' + prefix ]( funcArg, tplExpandData.expandedArgs, args ); diff --git a/modules/parser/mediawiki.parser.environment.js b/modules/parser/mediawiki.parser.environment.js index f8875c217b..c24695ea28 100644 --- a/modules/parser/mediawiki.parser.environment.js +++ b/modules/parser/mediawiki.parser.environment.js @@ -116,10 +116,11 @@ MWParserEnvironment.prototype.tokensToString = function ( tokens ) { } for ( var i = 0, l = tokens.length; i < l; i++ ) { var token = tokens[i]; - if ( ! token ) { + if ( token === undefined ) { console.trace(); - this.dp( 'MWParserEnvironment.tokensToString, invalid token: ' + - JSON.stringify( token ) ); + this.tp( 'MWParserEnvironment.tokensToString, invalid token: ' + + JSON.stringify( token ) + + ' tokens:' + JSON.stringify( tokens, null, 2 )); continue; } if ( token.constructor === String ) { diff --git a/modules/parser/mediawiki.parser.js b/modules/parser/mediawiki.parser.js index 029618be83..8e10db39c4 100644 --- a/modules/parser/mediawiki.parser.js +++ b/modules/parser/mediawiki.parser.js @@ -77,7 +77,7 @@ function ParserPipeline( env, inputType ) { // Add token transformations.. new QuoteTransformer( this.tokenPostProcessor ); new PostExpandParagraphHandler( this.tokenPostProcessor ); - new Sanitizer( this.tokenPostProcessor ); + //new Sanitizer( this.tokenPostProcessor ); //var citeExtension = new Cite( this.tokenTransformer ); diff --git a/modules/parser/pegTokenizer.pegjs.txt b/modules/parser/pegTokenizer.pegjs.txt index a58a66dd2c..063aa66a57 100644 --- a/modules/parser/pegTokenizer.pegjs.txt +++ b/modules/parser/pegTokenizer.pegjs.txt @@ -29,6 +29,36 @@ return es; }; + + var flatten_string = function ( c ) { + var out = [], + text = []; + c = flatten(c); + for (var i = 0, l = c.length; i < l; i++) { + var ci = c[i]; + if (ci.constructor === String) { + if(ci !== '') { + text.push(ci); + } + } else { + if (text.length) { + out.push( text.join('') ); + text = []; + } + out.push(ci); + } + } + if (text.length) { + out.push( text.join('') ); + } + + if ( out.length === 1 && out[0].constructor === String ) { + return out[0]; + } else { + return out; + } + }; + // Remove escaped quotes from attributes etc // This was in the original PEG parser, but could not find anything in // MediaWiki that supports \' and \"-style escaping. So remove? -- gwicke @@ -308,6 +338,40 @@ urltext = ( t:[^'<~[{\n\rfghimnstw|!:\]} &=]+ { return t.join(''); } / ' ' & ':' { return "\u00a0"; } / t:text_char )+ +directive + = comment + / tplarg_or_template + / htmlentity + +spaceless_preprocessor_text + = r:( t:[^'<~[{\n\r|!\]}\t &=]+ { return t.join(''); } + / directive + / !inline_breaks !' ' text_char )+ { + return flatten_string ( r ); + } + +link_preprocessor_text + = r:( t:[^'<~[{\n\r|!\]}\t &=]+ { return t.join(''); } + / directive + / !inline_breaks no_punctuation_char + / s:[.:,] !(space / eolf) { return s } + / urlencoded_char + / [&%] )+ { + return flatten_string ( r ); + } + +// Plain text, but can contain templates, template arguments, comments etc- +// all stuff that is normally handled by the preprocessor +// Returns either a list of tokens, or a plain string (if nothing is to be +// processed). +preprocessor_text + = r:( t:[^'<~[{\n\r\t|!\]} &=]+ { return t.join(''); } + / directive + / !inline_breaks text_char )+ { + return flatten_string ( r ); + } + + /* '//', // for protocol-relative URLs, but not in text! 'ftp://', @@ -553,6 +617,7 @@ inline_element = //& { dp('inline_element enter' + input.substr(pos, 10)); return true; } & '<' ( comment / xmlish_tag ) / & '{' ( & '{{{{{' template / tplarg / template ) + / & '{' tplarg_or_template /// & '{' ( tplarg / template ) // Eat three opening brackets as text. / '[[[' { return '[[[' } @@ -632,9 +697,9 @@ urllink extlink = "[" & { return setFlag('extlink'); } - target:(url / tplarg / template) - space* - text:inlineline? + //target:urllink + target:link_preprocessor_text + text:(space* t:inlineline { return t } )? "]" { clearFlag('extlink'); if ( text == '' ) { @@ -642,13 +707,15 @@ extlink text = [ "[" + linkCount + "]" ]; linkCount++; } - return [ + var res = [ new TagTk( 'a', [ new KV('href', target), new KV('data-type', 'external') ] ), ].concat( text , [ new EndTagTk( 'a' )]); + //console.log( JSON.stringify( res, null, 2 ) ); + return res; } / "[" & { clearFlag('extlink'); return false; } @@ -713,6 +780,8 @@ ipv6_address return flatten( a ).join(''); } +tplarg_or_template = & '{{{{{' template / tplarg / template + template = "{{" target:template_param_text params:(newline? "|" newline? p:template_param { return p })*