Pluck a few low-hanging fruit in external link tokenization, and add a simple

localurl parser function implementation. 230 parser tests now passing.
https://mediawiki.org/wiki/Special:Code/MediaWiki/110834
2024-09-25 11:16:51 +00:00 · 2012-02-07 10:28:23 +00:00 · 2012-02-07 10:28:23 +00:00 · 1f6db903e9 · 2012-02-27 16:40:01 +00:00
parent cf8b7bf45d
commit 1f6db903e9
5 changed files with 27 additions and 6 deletions
--- a/modules/parser/ext.core.ParserFunctions.js
+++ b/modules/parser/ext.core.ParserFunctions.js
@ -212,6 +212,18 @@ ParserFunctions.prototype['pf_#expr'] = function ( target, argList, argDict ) {
 	return [ res.toString() ];
 };

+ParserFunctions.prototype['pf_localurl'] = function ( target, argList, argDict ) {
+	return ( this.manager.env.wgScriptPath + '/index' +
+				this.manager.env.wgScriptExtension + '?title=' +
+				target + '&' +
+				argList.map( 
+					function( kv ) { 
+						//console.log( JSON.stringify( kv ) );
+						return (kv.v !== '' && kv.k + '=' + kv.v ) || kv.k;
+					} 
+				).join('&') 
+		   );
+};


 /**
--- a/modules/parser/mediawiki.parser.js
+++ b/modules/parser/mediawiki.parser.js
@ -77,7 +77,7 @@ function ParserPipeline( env, inputType ) {
 	// Add token transformations..
 	new QuoteTransformer( this.tokenPostProcessor );
 	new PostExpandParagraphHandler( this.tokenPostProcessor );
-	//new Sanitizer( this.tokenPostProcessor );
+	new Sanitizer( this.tokenPostProcessor );
 	
 	//var citeExtension = new Cite( this.tokenTransformer );

--- a/modules/parser/pegTokenizer.pegjs.txt
+++ b/modules/parser/pegTokenizer.pegjs.txt
@ -351,11 +351,11 @@ spaceless_preprocessor_text
  }

 link_preprocessor_text
-  = r:( t:[^'<~[{\n\r|!\]}\t &=]+ { return t.join(''); }
+  = r:( t:[^'<~[{\n\r|!\]}\t &="']+ { return t.join(''); }
  / directive
+  / urlencoded_char
  / !inline_breaks no_punctuation_char
  / s:[.:,] !(space / eolf) { return s } 
-  / urlencoded_char
  / [&%] )+ {
      return flatten_string ( r );
  }
@ -687,7 +687,8 @@ comment_chars


 urllink
-  = target:url {
+  = ! { return syntaxFlags['extlink'] }
+    target:url {
      return [ new TagTk( 'a', [new KV('href', target)] )
             , target
             , new EndTagTk( 'a' )
--- a/tests/parser/parserTests-whitelist.js
+++ b/tests/parser/parserTests-whitelist.js
@ -59,6 +59,13 @@ testWhiteList["Table security: embedded pipes (http://lists.wikimedia.org/mailma
 // Sanitizer, but UTF8 in link might actually be ok in HTML5
 testWhiteList["External link containing double-single-quotes with no space separating the url from text in italics"] = "<p><a href=\"http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm\"><i>La muerte de Casagemas</i> (1901) en el sitio de </a><a data-type=\"internal\" href=\"Museo Picasso (París)\">Museo Picasso</a>.</p>";

+// plain percent sign is also valid in HTML5
+testWhiteList["Bug 4781, 5267: %28, %29 in URL"] = "<p><a href=\"http://www.example.com/?title=Ben-Hur_(1959_film)\" data-sourcePos=\"0:53\">http://www.example.com/?title=Ben-Hur_(1959_film)</a></p>";
+
+testWhiteList["External links: wiki links within external link (Bug 3695)"] = "<p><a href=\"http://example.com\" data-type=\"external\" data-sourcePos=\"0:54\"></a><a data-type=\"internal\" href=\"wikilink\">wikilink</a> embedded in ext link</p>";
+
+testWhiteList["Bug 4781, 5267: %25 in URL"] = "<p><a href=\"http://www.example.com/?title=100%_Bran\" data-sourcePos=\"0:41\">http://www.example.com/?title=100%_Bran</a></p>";
+

 if (typeof module == "object") {
 	module.exports.testWhiteList = testWhiteList;
--- a/tests/parser/parserTests.js
+++ b/tests/parser/parserTests.js
@ -199,7 +199,8 @@ function ParserTests () {
 	this.env = new MWParserEnvironment({ 
 		fetchTemplates: false,
 		debug: this.argv.debug,
-		trace: this.argv.trace
+		trace: this.argv.trace,
+		wgScriptPath: ''
 	});
 }

@ -313,7 +314,7 @@ ParserTests.prototype.normalizeHTML = function (source) {
 			// general class and titles, typically on links
 			.replace(/(title|class|rel)="[^"]+"/g, '')
 			// strip red link markup, we do not check if a page exists yet
-			.replace(/\/index.php\?title=|&amp;action=edit&amp;redlink=1/g, '')
+			.replace(/\/index.php\?title=([^']+)&amp;action=edit&amp;redlink=1/g, '$1')
 			// the expected html has some extra space in tags, strip it
 			.replace(/<a +href/g, '<a href')
 			.replace(/" +>/g, '">');